ICU-20893 Line break tailorings updated to Unicode 13.

2019-11-22 11:54:17 -08:00 · 2019-11-22 11:54:17 -08:00 · 197e0239ab
commit 197e0239ab
parent 017c8b762e
17 changed files with 288 additions and 233 deletions
--- a/icu4c/source/data/brkitr/rules/line_cj.txt
+++ b/icu4c/source/data/brkitr/rules/line_cj.txt
@ -7,7 +7,7 @@
 #
 #         Line Breaking Rules
 #         Implement default line breaking as defined by
-#         Unicode Standard Annex #14 Revision 42 for Unicode 12.0
+#         Unicode Standard Annex #14 Revision 44 for Unicode 13.0
 #         http://www.unicode.org/reports/tr14/, with the following modification:
 #
 #         Boundaries between hyphens and following letters are suppressed when
@ -70,6 +70,13 @@ $XX = [:LineBreak =  Unknown:];
 $ZW = [:LineBreak =  ZWSpace:];
 $ZWJ = [:LineBreak = ZWJ:];

+# OP30 and CP30 are variants of OP and CP that appear in-line in rule LB30 from UAX 14,
+# without a formal name. Because ICU rules require multiple uses of the expressions,
+# give them a single definition with a name
+
+$OP30 = [$OP - [\p{ea=F}\p{ea=W}\p{ea=H}]];
+$CP30 = [$CP - [\p{ea=F}\p{ea=W}\p{ea=H}]];
+
 # By LB9, a ZWJ also behaves as a CM. Including it in the definition of CM avoids having to explicitly
 #         list it in the numerous rules that use CM.
 # By LB1, SA characters with general categor of Mn or Mc also resolve to CM.
@ -109,7 +116,7 @@ $CANT_CM = [ $SP $BK $CR $LF $NL $ZW $CM];       # Bases that can't take CMs
 # AL_FOLLOW  set of chars that can unconditionally follow an AL
 #            Needed in rules where stand-alone $CM s are treated as AL.
 #
-$AL_FOLLOW      = [$BK $CR $LF $NL $ZW $SP $CL $CP $EX $HL $IS $SY $WJ $GL $OP $QU $BA $HY $NS $IN $NU $PR $PO $ALPlus];
+$AL_FOLLOW      = [$BK $CR $LF $NL $ZW $SP $CL $CP $EX $HL $IS $SY $WJ $GL $OP30 $QU $BA $HY $NS $IN $NU $PR $PO $ALPlus];


 #
@ -212,7 +219,7 @@ $OP $CM* $SP+ $CM+ $AL_FOLLOW?;    # by rule 10, stand-alone CM behaves as AL
 #        See issue ICU-20303


-$CanFollowIS = [$BK $CR $LF $NL $SP $ZW $WJ $GL $CL $CP $EX $IS $SY $QU $BA $HY $NS $ALPlus $HL];
+$CanFollowIS = [$BK $CR $LF $NL $SP $ZW $WJ $GL $CL $CP $EX $IS $SY $QU $BA $HY $NS $ALPlus $HL $IN];
 $SP $IS           / [^ $CanFollowIS $NU $CM];
 $SP $IS $CM* $CMX / [^ $CanFollowIS $NU $CM];

@ -283,16 +290,13 @@ $HL $CM* ($HY | $BA) $CM* [^$CB]?;
 # (break between HL and SY already disallowed by LB 13 above)
 $SY $CM* $HL;

-# LB 22
-($ALPlus | $HL) $CM* $IN;
-^$CM+    $IN;     #  by rule 10, any otherwise unattached CM behaves as AL
-$EX $CM*    $IN;
-($ID | $EB | $EM) $CM*  $IN;
-$IN $CM*    $IN;
-$NU $CM*    $IN;
+# LB 22  Do not break before ellipses
+#
+$LB20NonBreaks $CM*    $IN;
+^$CM+ $IN;


-# $LB 23
+# LB 23
 #
 ($ALPlus | $HL) $CM* $NU;
 ^$CM+  $NU;       # Rule 10, any otherwise unattached CM behaves as AL
@ -338,15 +342,15 @@ $PR $CM* ($JL | $JV | $JT | $H2 | $H3);
 $IS $CM* ($ALPlus | $HL);

 # LB 30
-($ALPlus | $HL | $NU) $CM* $OP;
-^$CM+ $OP;         # The $CM+ is from rule 10, an unattached CM is treated as AL.
-$CP $CM* ($ALPlus | $HL | $NU);
+($ALPlus | $HL | $NU) $CM* $OP30;
+^$CM+ $OP30;         # The $CM+ is from rule 10, an unattached CM is treated as AL.
+$CP30 $CM* ($ALPlus | $HL | $NU);

 # LB 30a  Do not break between regional indicators. Break after pairs of them.
 #         Tricky interaction with LB8a: ZWJ x .   together with ZWJ acting like a CM.
-$RI $CM* $RI                 / [[^$BK $CR $LF $NL $SP $ZW $WJ $CL $CP $EX $IS $SY $GL $QU $BA $HY $NS $CM]];
-$RI $CM* $RI $CM* [$CM-$ZWJ] / [[^$BK $CR $LF $NL $SP $ZW $WJ $CL $CP $EX $IS $SY $GL $QU $BA $HY $NS $CM]];
-$RI $CM* $RI $CM* [$BK $CR $LF $NL $SP $ZW $WJ $CL $CP $EX $IS $SY $GL $QU $BA $HY $NS $ZWJ {eof}];
+$RI $CM* $RI                 / [[^$BK $CR $LF $NL $SP $ZW $WJ $CL $CP $EX $IS $SY $GL $QU $BA $HY $NS $IN $CM]];
+$RI $CM* $RI $CM* [$CM-$ZWJ] / [[^$BK $CR $LF $NL $SP $ZW $WJ $CL $CP $EX $IS $SY $GL $QU $BA $HY $NS $IN $CM]];
+$RI $CM* $RI $CM* [$BK $CR $LF $NL $SP $ZW $WJ $CL $CP $EX $IS $SY $GL $QU $BA $HY $NS $IN $ZWJ {eof}];
 # note: the preceding rule includes {eof} rather than having the last [set] term qualified with '?'
 #       because of the chain-out behavior difference. The rule must chain out only from the [set characters],
 #       not from the preceding $RI or $CM, which it would be able to do if the set were optional.
--- a/icu4c/source/data/brkitr/rules/line_loose.txt
+++ b/icu4c/source/data/brkitr/rules/line_loose.txt
@ -8,7 +8,7 @@
 #
 #         Line Breaking Rules
 #         Implement default line breaking as defined by
-#         Unicode Standard Annex #14 Revision 42 for Unicode 12.0
+#         Unicode Standard Annex #14 Revision 44 for Unicode 13.0
 #         http://www.unicode.org/reports/tr14/, with the following modification:
 #
 #         Boundaries between hyphens and following letters are suppressed when
@ -76,6 +76,13 @@ $XX = [:LineBreak =  Unknown:];
 $ZW = [:LineBreak =  ZWSpace:];
 $ZWJ = [:LineBreak = ZWJ:];

+# OP30 and CP30 are variants of OP and CP that appear in-line in rule LB30 from UAX 14,
+# without a formal name. Because ICU rules require multiple uses of the expressions,
+# give them a single definition with a name
+
+$OP30 = [$OP - [\p{ea=F}\p{ea=W}\p{ea=H}]];
+$CP30 = [$CP - [\p{ea=F}\p{ea=W}\p{ea=H}]];
+
 # By LB9, a ZWJ also behaves as a CM. Including it in the definition of CM avoids having to explicitly
 #         list it in the numerous rules that use CM.
 # By LB1, SA characters with general categor of Mn or Mc also resolve to CM.
@ -115,7 +122,7 @@ $CANT_CM = [ $SP $BK $CR $LF $NL $ZW $CM];       # Bases that can't take CMs
 # AL_FOLLOW  set of chars that can unconditionally follow an AL
 #            Needed in rules where stand-alone $CM s are treated as AL.
 #
-$AL_FOLLOW      = [$BK $CR $LF $NL $ZW $SP $CL $CP $EX $HL $IS $SY $WJ $GL $OP $QU $BA $HY $NS $IN $NU $PR $PO $ALPlus];
+$AL_FOLLOW      = [$BK $CR $LF $NL $ZW $SP $CL $CP $EX $HL $IS $SY $WJ $GL $OP30 $QU $BA $HY $NS $IN $NU $PR $PO $ALPlus];


 #
@ -218,7 +225,7 @@ $OP $CM* $SP+ $CM+ $AL_FOLLOW?;    # by rule 10, stand-alone CM behaves as AL
 #        See issue ICU-20303


-$CanFollowIS = [$BK $CR $LF $NL $SP $ZW $WJ $GL $CL $CP $EX $IS $SY $QU $BA $HY $NS $ALPlus $HL];
+$CanFollowIS = [$BK $CR $LF $NL $SP $ZW $WJ $GL $CL $CP $EX $IS $SY $QU $BA $HY $NS $ALPlus $HL $IN];
 $SP $IS           / [^ $CanFollowIS $NU $CM];
 $SP $IS $CM* $CMX / [^ $CanFollowIS $NU $CM];

@ -292,16 +299,14 @@ $HL $CM* ($HY | $BA) $CM* [^$CB]?;
 # (break between HL and SY already disallowed by LB 13 above)
 $SY $CM* $HL;

-# LB 22
-($ALPlus | $HL) $CM* $IN;
-^$CM+    $IN;     #  by rule 10, any otherwise unattached CM behaves as AL
-$EX $CM*    $IN;
-($ID | $EB | $EM) $CM*  $IN;
-# $IN $CM*    $IN;  # delete this rule for CSS loose
-$NU $CM*    $IN;
+
+# LB 22  Do not break before ellipses
+#
+[$LB20NonBreaks - $IN] $CM*    $IN;    # line_loose tailoring
+^$CM+ $IN;


-# $LB 23
+# LB 23
 #
 ($ALPlus | $HL) $CM* $NU;
 ^$CM+  $NU;       # Rule 10, any otherwise unattached CM behaves as AL
@ -347,15 +352,15 @@ $PR $CM* ($JL | $JV | $JT | $H2 | $H3);
 $IS $CM* ($ALPlus | $HL);

 # LB 30
-($ALPlus | $HL | $NU) $CM* $OP;
-^$CM+ $OP;         # The $CM+ is from rule 10, an unattached CM is treated as AL.
-$CP $CM* ($ALPlus | $HL | $NU);
+($ALPlus | $HL | $NU) $CM* $OP30;
+^$CM+ $OP30;         # The $CM+ is from rule 10, an unattached CM is treated as AL.
+$CP30 $CM* ($ALPlus | $HL | $NU);

 # LB 30a  Do not break between regional indicators. Break after pairs of them.
 #         Tricky interaction with LB8a: ZWJ x .   together with ZWJ acting like a CM.
-$RI $CM* $RI                 / [[^$BK $CR $LF $NL $SP $ZW $WJ $CL $CP $EX $IS $SY $GL $QU $BA $HY $NS $CM]];
-$RI $CM* $RI $CM* [$CM-$ZWJ] / [[^$BK $CR $LF $NL $SP $ZW $WJ $CL $CP $EX $IS $SY $GL $QU $BA $HY $NS $CM]];
-$RI $CM* $RI $CM* [$BK $CR $LF $NL $SP $ZW $WJ $CL $CP $EX $IS $SY $GL $QU $BA $HY $NS $ZWJ {eof}];
+$RI $CM* $RI                 / [[^$BK $CR $LF $NL $SP $ZW $WJ $CL $CP $EX $IS $SY $GL $QU $BA $HY $NS $IN $CM]];
+$RI $CM* $RI $CM* [$CM-$ZWJ] / [[^$BK $CR $LF $NL $SP $ZW $WJ $CL $CP $EX $IS $SY $GL $QU $BA $HY $NS $IN $CM]];
+$RI $CM* $RI $CM* [$BK $CR $LF $NL $SP $ZW $WJ $CL $CP $EX $IS $SY $GL $QU $BA $HY $NS $IN $ZWJ {eof}];
 # note: the preceding rule includes {eof} rather than having the last [set] term qualified with '?'
 #       because of the chain-out behavior difference. The rule must chain out only from the [set characters],
 #       not from the preceding $RI or $CM, which it would be able to do if the set were optional.
--- a/icu4c/source/data/brkitr/rules/line_loose_cj.txt
+++ b/icu4c/source/data/brkitr/rules/line_loose_cj.txt
@ -7,7 +7,7 @@
 #
 #         Line Breaking Rules
 #         Implement default line breaking as defined by
-#         Unicode Standard Annex #14 Revision 42 for Unicode 12.0
+#         Unicode Standard Annex #14 Revision 44 for Unicode 13.0
 #         http://www.unicode.org/reports/tr14/, with the following modification:
 #
 #         Boundaries between hyphens and following letters are suppressed when
@ -87,6 +87,13 @@ $XX = [:LineBreak =  Unknown:];
 $ZW = [:LineBreak =  ZWSpace:];
 $ZWJ = [:LineBreak = ZWJ:];

+# OP30 and CP30 are variants of OP and CP that appear in-line in rule LB30 from UAX 14,
+# without a formal name. Because ICU rules require multiple uses of the expressions,
+# give them a single definition with a name
+
+$OP30 = [$OP - [\p{ea=F}\p{ea=W}\p{ea=H}]];
+$CP30 = [$CP - [\p{ea=F}\p{ea=W}\p{ea=H}]];
+
 # By LB9, a ZWJ also behaves as a CM. Including it in the definition of CM avoids having to explicitly
 #         list it in the numerous rules that use CM.
 # By LB1, SA characters with general categor of Mn or Mc also resolve to CM.
@ -126,7 +133,7 @@ $CANT_CM = [ $SP $BK $CR $LF $NL $ZW $CM];       # Bases that can't take CMs
 # AL_FOLLOW  set of chars that can unconditionally follow an AL
 #            Needed in rules where stand-alone $CM s are treated as AL.
 #
-$AL_FOLLOW      = [$BK $CR $LF $NL $ZW $SP $CL $CP $EX $HL $IS $SY $WJ $GL $OP $QU $BA $HY $NS $IN $NU $PR $PO $POX $ALPlus];
+$AL_FOLLOW      = [$BK $CR $LF $NL $ZW $SP $CL $CP $EX $HL $IS $SY $WJ $GL $OP30 $QU $BA $HY $NS $IN $NU $PR $PO $POX $ALPlus];


 #
@ -229,7 +236,7 @@ $OP $CM* $SP+ $CM+ $AL_FOLLOW?;    # by rule 10, stand-alone CM behaves as AL
 #        See issue ICU-20303


-$CanFollowIS = [$BK $CR $LF $NL $SP $ZW $WJ $GL $CL $CP $EX $IS $SY $QU $BA $HY $NS $ALPlus $HL];
+$CanFollowIS = [$BK $CR $LF $NL $SP $ZW $WJ $GL $CL $CP $EX $IS $SY $QU $BA $HY $NS $ALPlus $HL $IN];
 $SP $IS           / [^ $CanFollowIS $NU $CM];
 $SP $IS $CM* $CMX / [^ $CanFollowIS $NU $CM];

@ -303,16 +310,14 @@ $HL $CM* ($HY | $BA | $BAX) $CM* [^$CB]?;
 # (break between HL and SY already disallowed by LB 13 above)
 $SY $CM* $HL;

-# LB 22
-($ALPlus | $HL) $CM* $IN;
-^$CM+    $IN;     #  by rule 10, any otherwise unattached CM behaves as AL
-$EX $CM*    $IN;
-($ID | $EB | $EM) $CM*  $IN;
-# $IN $CM*    $IN;  # delete this rule for CSS loose
-$NU $CM*    $IN;
+
+# LB 22  Do not break before ellipses
+#
+[$LB20NonBreaks - $IN] $CM*    $IN;    # line_loose tailoring
+^$CM+ $IN;


-# $LB 23
+# LB 23
 #
 ($ALPlus | $HL) $CM* $NU;
 ^$CM+  $NU;       # Rule 10, any otherwise unattached CM behaves as AL
@ -362,15 +367,15 @@ $PR $CM* ($JL | $JV | $JT | $H2 | $H3);
 $IS $CM* ($ALPlus | $HL);

 # LB 30
-($ALPlus | $HL | $NU) $CM* $OP;
-^$CM+ $OP;         # The $CM+ is from rule 10, an unattached CM is treated as AL.
-$CP $CM* ($ALPlus | $HL | $NU);
+($ALPlus | $HL | $NU) $CM* $OP30;
+^$CM+ $OP30;         # The $CM+ is from rule 10, an unattached CM is treated as AL.
+$CP30 $CM* ($ALPlus | $HL | $NU);

 # LB 30a  Do not break between regional indicators. Break after pairs of them.
 #         Tricky interaction with LB8a: ZWJ x .   together with ZWJ acting like a CM.
-$RI $CM* $RI                 / [[^$BK $CR $LF $NL $SP $ZW $WJ $CL $CP $EX $IS $SY $GL $QU $BA $HY $NS $CM]];
-$RI $CM* $RI $CM* [$CM-$ZWJ] / [[^$BK $CR $LF $NL $SP $ZW $WJ $CL $CP $EX $IS $SY $GL $QU $BA $HY $NS $CM]];
-$RI $CM* $RI $CM* [$BK $CR $LF $NL $SP $ZW $WJ $CL $CP $EX $IS $SY $GL $QU $BA $HY $NS $ZWJ {eof}];
+$RI $CM* $RI                 / [[^$BK $CR $LF $NL $SP $ZW $WJ $CL $CP $EX $IS $SY $GL $QU $BA $HY $NS $IN $CM]];
+$RI $CM* $RI $CM* [$CM-$ZWJ] / [[^$BK $CR $LF $NL $SP $ZW $WJ $CL $CP $EX $IS $SY $GL $QU $BA $HY $NS $IN $CM]];
+$RI $CM* $RI $CM* [$BK $CR $LF $NL $SP $ZW $WJ $CL $CP $EX $IS $SY $GL $QU $BA $HY $NS $IN $ZWJ {eof}];
 # note: the preceding rule includes {eof} rather than having the last [set] term qualified with '?'
 #       because of the chain-out behavior difference. The rule must chain out only from the [set characters],
 #       not from the preceding $RI or $CM, which it would be able to do if the set were optional.
--- a/icu4c/source/data/brkitr/rules/line_normal.txt
+++ b/icu4c/source/data/brkitr/rules/line_normal.txt
@ -7,7 +7,7 @@
 #
 #         Line Breaking Rules
 #         Implement default line breaking as defined by
-#         Unicode Standard Annex #14 Revision 42 for Unicode 12.0
+#         Unicode Standard Annex #14 Revision 44 for Unicode 13.0
 #         http://www.unicode.org/reports/tr14/, with the following modification:
 #
 #         Boundaries between hyphens and following letters are suppressed when
@ -71,6 +71,13 @@ $XX = [:LineBreak =  Unknown:];
 $ZW = [:LineBreak =  ZWSpace:];
 $ZWJ = [:LineBreak = ZWJ:];

+# OP30 and CP30 are variants of OP and CP that appear in-line in rule LB30 from UAX 14,
+# without a formal name. Because ICU rules require multiple uses of the expressions,
+# give them a single definition with a name
+
+$OP30 = [$OP - [\p{ea=F}\p{ea=W}\p{ea=H}]];
+$CP30 = [$CP - [\p{ea=F}\p{ea=W}\p{ea=H}]];
+
 # By LB9, a ZWJ also behaves as a CM. Including it in the definition of CM avoids having to explicitly
 #         list it in the numerous rules that use CM.
 # By LB1, SA characters with general categor of Mn or Mc also resolve to CM.
@ -110,7 +117,7 @@ $CANT_CM = [ $SP $BK $CR $LF $NL $ZW $CM];       # Bases that can't take CMs
 # AL_FOLLOW  set of chars that can unconditionally follow an AL
 #            Needed in rules where stand-alone $CM s are treated as AL.
 #
-$AL_FOLLOW      = [$BK $CR $LF $NL $ZW $SP $CL $CP $EX $HL $IS $SY $WJ $GL $OP $QU $BA $HY $NS $IN $NU $PR $PO $ALPlus];
+$AL_FOLLOW      = [$BK $CR $LF $NL $ZW $SP $CL $CP $EX $HL $IS $SY $WJ $GL $OP30 $QU $BA $HY $NS $IN $NU $PR $PO $ALPlus];


 #
@ -213,7 +220,7 @@ $OP $CM* $SP+ $CM+ $AL_FOLLOW?;    # by rule 10, stand-alone CM behaves as AL
 #        See issue ICU-20303


-$CanFollowIS = [$BK $CR $LF $NL $SP $ZW $WJ $GL $CL $CP $EX $IS $SY $QU $BA $HY $NS $ALPlus $HL];
+$CanFollowIS = [$BK $CR $LF $NL $SP $ZW $WJ $GL $CL $CP $EX $IS $SY $QU $BA $HY $NS $ALPlus $HL $IN];
 $SP $IS           / [^ $CanFollowIS $NU $CM];
 $SP $IS $CM* $CMX / [^ $CanFollowIS $NU $CM];

@ -284,16 +291,13 @@ $HL $CM* ($HY | $BA) $CM* [^$CB]?;
 # (break between HL and SY already disallowed by LB 13 above)
 $SY $CM* $HL;

-# LB 22
-($ALPlus | $HL) $CM* $IN;
-^$CM+    $IN;     #  by rule 10, any otherwise unattached CM behaves as AL
-$EX $CM*    $IN;
-($ID | $EB | $EM) $CM*  $IN;
-$IN $CM*    $IN;
-$NU $CM*    $IN;
+# LB 22  Do not break before ellipses
+#
+$LB20NonBreaks $CM*    $IN;
+^$CM+ $IN;


-# $LB 23
+# LB 23
 #
 ($ALPlus | $HL) $CM* $NU;
 ^$CM+  $NU;       # Rule 10, any otherwise unattached CM behaves as AL
@ -339,15 +343,15 @@ $PR $CM* ($JL | $JV | $JT | $H2 | $H3);
 $IS $CM* ($ALPlus | $HL);

 # LB 30
-($ALPlus | $HL | $NU) $CM* $OP;
-^$CM+ $OP;         # The $CM+ is from rule 10, an unattached CM is treated as AL.
-$CP $CM* ($ALPlus | $HL | $NU);
+($ALPlus | $HL | $NU) $CM* $OP30;
+^$CM+ $OP30;         # The $CM+ is from rule 10, an unattached CM is treated as AL.
+$CP30 $CM* ($ALPlus | $HL | $NU);

 # LB 30a  Do not break between regional indicators. Break after pairs of them.
 #         Tricky interaction with LB8a: ZWJ x .   together with ZWJ acting like a CM.
-$RI $CM* $RI                 / [[^$BK $CR $LF $NL $SP $ZW $WJ $CL $CP $EX $IS $SY $GL $QU $BA $HY $NS $CM]];
-$RI $CM* $RI $CM* [$CM-$ZWJ] / [[^$BK $CR $LF $NL $SP $ZW $WJ $CL $CP $EX $IS $SY $GL $QU $BA $HY $NS $CM]];
-$RI $CM* $RI $CM* [$BK $CR $LF $NL $SP $ZW $WJ $CL $CP $EX $IS $SY $GL $QU $BA $HY $NS $ZWJ {eof}];
+$RI $CM* $RI                 / [[^$BK $CR $LF $NL $SP $ZW $WJ $CL $CP $EX $IS $SY $GL $QU $BA $HY $NS $IN $CM]];
+$RI $CM* $RI $CM* [$CM-$ZWJ] / [[^$BK $CR $LF $NL $SP $ZW $WJ $CL $CP $EX $IS $SY $GL $QU $BA $HY $NS $IN $CM]];
+$RI $CM* $RI $CM* [$BK $CR $LF $NL $SP $ZW $WJ $CL $CP $EX $IS $SY $GL $QU $BA $HY $NS $IN $ZWJ {eof}];
 # note: the preceding rule includes {eof} rather than having the last [set] term qualified with '?'
 #       because of the chain-out behavior difference. The rule must chain out only from the [set characters],
 #       not from the preceding $RI or $CM, which it would be able to do if the set were optional.
--- a/icu4c/source/data/brkitr/rules/line_normal_cj.txt
+++ b/icu4c/source/data/brkitr/rules/line_normal_cj.txt
@ -7,7 +7,7 @@
 #
 #         Line Breaking Rules
 #         Implement default line breaking as defined by
-#         Unicode Standard Annex #14 Revision 42 for Unicode 12.0
+#         Unicode Standard Annex #14 Revision 44 for Unicode 13.0
 #         http://www.unicode.org/reports/tr14/, with the following modification:
 #
 #         Boundaries between hyphens and following letters are suppressed when
@ -75,6 +75,13 @@ $XX = [:LineBreak =  Unknown:];
 $ZW = [:LineBreak =  ZWSpace:];
 $ZWJ = [:LineBreak = ZWJ:];

+# OP30 and CP30 are variants of OP and CP that appear in-line in rule LB30 from UAX 14,
+# without a formal name. Because ICU rules require multiple uses of the expressions,
+# give them a single definition with a name
+
+$OP30 = [$OP - [\p{ea=F}\p{ea=W}\p{ea=H}]];
+$CP30 = [$CP - [\p{ea=F}\p{ea=W}\p{ea=H}]];
+
 # By LB9, a ZWJ also behaves as a CM. Including it in the definition of CM avoids having to explicitly
 #         list it in the numerous rules that use CM.
 # By LB1, SA characters with general categor of Mn or Mc also resolve to CM.
@ -114,7 +121,7 @@ $CANT_CM = [ $SP $BK $CR $LF $NL $ZW $CM];       # Bases that can't take CMs
 # AL_FOLLOW  set of chars that can unconditionally follow an AL
 #            Needed in rules where stand-alone $CM s are treated as AL.
 #
-$AL_FOLLOW      = [$BK $CR $LF $NL $ZW $SP $CL $CP $EX $HL $IS $SY $WJ $GL $OP $QU $BA $HY $NS $IN $NU $PR $PO $ALPlus];
+$AL_FOLLOW      = [$BK $CR $LF $NL $ZW $SP $CL $CP $EX $HL $IS $SY $WJ $GL $OP30 $QU $BA $HY $NS $IN $NU $PR $PO $ALPlus];


 #
@ -217,7 +224,7 @@ $OP $CM* $SP+ $CM+ $AL_FOLLOW?;    # by rule 10, stand-alone CM behaves as AL
 #        See issue ICU-20303


-$CanFollowIS = [$BK $CR $LF $NL $SP $ZW $WJ $GL $CL $CP $EX $IS $SY $QU $BA $HY $NS $ALPlus $HL];
+$CanFollowIS = [$BK $CR $LF $NL $SP $ZW $WJ $GL $CL $CP $EX $IS $SY $QU $BA $HY $NS $ALPlus $HL $IN];
 $SP $IS           / [^ $CanFollowIS $NU $CM];
 $SP $IS $CM* $CMX / [^ $CanFollowIS $NU $CM];

@ -291,16 +298,13 @@ $HL $CM* ($HY | $BA | $BAX) $CM* [^$CB]?;
 # (break between HL and SY already disallowed by LB 13 above)
 $SY $CM* $HL;

-# LB 22
-($ALPlus | $HL) $CM* $IN;
-^$CM+    $IN;     #  by rule 10, any otherwise unattached CM behaves as AL
-$EX $CM*    $IN;
-($ID | $EB | $EM) $CM*  $IN;
-$IN $CM*    $IN;
-$NU $CM*    $IN;
+# LB 22  Do not break before ellipses
+#
+$LB20NonBreaks $CM*    $IN;
+^$CM+ $IN;


-# $LB 23
+# LB 23
 #
 ($ALPlus | $HL) $CM* $NU;
 ^$CM+  $NU;       # Rule 10, any otherwise unattached CM behaves as AL
@ -346,15 +350,15 @@ $PR $CM* ($JL | $JV | $JT | $H2 | $H3);
 $IS $CM* ($ALPlus | $HL);

 # LB 30
-($ALPlus | $HL | $NU) $CM* $OP;
-^$CM+ $OP;         # The $CM+ is from rule 10, an unattached CM is treated as AL.
-$CP $CM* ($ALPlus | $HL | $NU);
+($ALPlus | $HL | $NU) $CM* $OP30;
+^$CM+ $OP30;         # The $CM+ is from rule 10, an unattached CM is treated as AL.
+$CP30 $CM* ($ALPlus | $HL | $NU);

 # LB 30a  Do not break between regional indicators. Break after pairs of them.
 #         Tricky interaction with LB8a: ZWJ x .   together with ZWJ acting like a CM.
-$RI $CM* $RI                 / [[^$BK $CR $LF $NL $SP $ZW $WJ $CL $CP $EX $IS $SY $GL $QU $BA $HY $NS $CM]];
-$RI $CM* $RI $CM* [$CM-$ZWJ] / [[^$BK $CR $LF $NL $SP $ZW $WJ $CL $CP $EX $IS $SY $GL $QU $BA $HY $NS $CM]];
-$RI $CM* $RI $CM* [$BK $CR $LF $NL $SP $ZW $WJ $CL $CP $EX $IS $SY $GL $QU $BA $HY $NS $ZWJ {eof}];
+$RI $CM* $RI                 / [[^$BK $CR $LF $NL $SP $ZW $WJ $CL $CP $EX $IS $SY $GL $QU $BA $HY $NS $IN $CM]];
+$RI $CM* $RI $CM* [$CM-$ZWJ] / [[^$BK $CR $LF $NL $SP $ZW $WJ $CL $CP $EX $IS $SY $GL $QU $BA $HY $NS $IN $CM]];
+$RI $CM* $RI $CM* [$BK $CR $LF $NL $SP $ZW $WJ $CL $CP $EX $IS $SY $GL $QU $BA $HY $NS $IN $ZWJ {eof}];
 # note: the preceding rule includes {eof} rather than having the last [set] term qualified with '?'
 #       because of the chain-out behavior difference. The rule must chain out only from the [set characters],
 #       not from the preceding $RI or $CM, which it would be able to do if the set were optional.
--- a/icu4c/source/test/testdata/break_rules/line.txt
+++ b/icu4c/source/test/testdata/break_rules/line.txt
@ -6,7 +6,14 @@
 # file: line.txt
 #
 # Reference Line Break rules for intltest rbbi/RBBIMonkeyTest.
-# Rules derived from Unicode Standard Annex #14 Revision 44 for Unicode 13.0
+# Rules derived from Unicode Standard Annex #14 Revision 44 for Unicode 13.0,
+# with the following modification:
+#
+#         Boundaries between hyphens and following letters are suppressed when
+#         there is a boundary preceding the hyphen. See rule 20.9
+#
+#         This corresponds to CSS line-break=strict (BCP47 -u-lb-strict).
+#         It sets characters of class CJ to behave like NS.
 #
 # Note: Rule syntax and the monkey test itself are still a work in progress.
 #       They are expected to change with review and the addition of support for rule tailoring.
@ -172,7 +179,7 @@ LB21.2:      BB CM* [^CM CB];

 LB21b:       SY CM* HL;

-LB22.2:       . CM* IN;
+LB22:        . CM* IN;

 LB23.1:      (AL | HL | CM) CM* NU;
 LB23.2:      NU CM* (AL | HL);
--- a/icu4c/source/test/testdata/break_rules/line_cj.txt
+++ b/icu4c/source/test/testdata/break_rules/line_cj.txt
@ -6,7 +6,7 @@
 # file: line.txt
 #
 # Reference Line Break rules for intltest rbbi/RBBIMonkeyTest.
-# Rules derived from Unicode Standard Annex #14 Revision 40 for Unicode 11.0
+# Rules derived from Unicode Standard Annex #14 Revision 44 for Unicode 13.0
 #
 # Note: Rule syntax and the monkey test itself are still a work in progress.
 #       They are expected to change with review and the addition of support for rule tailoring.
@ -61,6 +61,20 @@ XX = [:LineBreak =  Unknown:];
 ZW = [:LineBreak =  ZWSpace:];
 ZWJ = [:LineBreak =  ZWJ:];

+# OP30 and CP30 are variants of OP and CP that appear in rule LB30 from UAX 14.
+# Limitations of this monkey test rule parser require that these definitions be pulled out
+# rather than appearing in-line in LB 30.
+
+OP30 = [OP - [\p{ea=F}\p{ea=W}\p{ea=H}]];
+CP30 = [CP - [\p{ea=F}\p{ea=W}\p{ea=H}]];
+
+# OP30 and CP30 are variants of OP and CP that appear in rule LB30 from UAX 14.
+# Limitations of this monkey test rule parser require that these definitions be pulled out
+# rather than appearing in-line in LB 30.
+
+OP30 = [OP - [\p{ea=F}\p{ea=W}\p{ea=H}]];
+CP30 = [CP - [\p{ea=F}\p{ea=W}\p{ea=H}]];
+
 # LB1 - Resolve AI, CB, CJ, SA, SG, and XX into other line breaking classes
 AL = [AL AI SG XX ];
 dictionary = SA;
@ -165,11 +179,7 @@ LB21.2:      BB CM* [^CM CB];

 LB21b:       SY CM* HL;

-LB22.1:        (AL | HL | CM) CM* IN;   # The CM is from LB10, treat an unattached CM as AL.
-LB22.2:       EX CM* IN;
-LB22.3:       (ID | EB | EM) CM* IN;
-LB22.4:       IN CM* IN;
-LB22.5:       NU CM* IN;
+LB22:        . CM* IN;

 LB23.1:      (AL | HL | CM) CM* NU;
 LB23.2:      NU CM* (AL | HL);
@ -196,13 +206,13 @@ LB28:        (AL | HL | CM)CM* (AL | HL);
 LB29:        IS CM* (AL | HL);

 # LB30  is adjusted for unattached leading CM being treated as AL.
-LB30.1:      (AL | CM | HL | NU) CM* OP;
-LB30.2:      CP CM* (AL | HL | NU);
+LB30.1:      (AL | CM | HL | NU) CM* OP30;
+LB30.2:      CP30 CM* (AL | HL | NU);

 # LB30a  keep pairs of RI together.
-LB30a.1:     RI CM* RI         ÷ [^BK CR LF NL SP ZW WJ CL CP EX IS SY GL QU BA HY NS CM];
-LB30a.2:     RI CM* RI CM* CMS ÷ [^BK CR LF NL SP ZW WJ CL CP EX IS SY GL QU BA HY NS CM];
-LB30a.3:     RI CM* RI CM* [BK CR LF NL SP ZW WJ GL CL CP EX IS SY QU BA HY NS ZWJ]?;
+LB30a.1:     RI CM* RI         ÷ [^BK CR LF NL SP ZW WJ CL CP EX IS SY GL QU BA HY NS IN CM];
+LB30a.2:     RI CM* RI CM* CMS ÷ [^BK CR LF NL SP ZW WJ CL CP EX IS SY GL QU BA HY NS IN CM];
+LB30a.3:     RI CM* RI CM* [BK CR LF NL SP ZW WJ GL CL CP EX IS SY QU BA HY NS IN ZWJ]?;

 # LB30b Do not break between Emoji Base and Emoji Modifier
 LB30b:       EB CM* EM;
--- a/icu4c/source/test/testdata/break_rules/line_loose.txt
+++ b/icu4c/source/test/testdata/break_rules/line_loose.txt
@ -6,7 +6,7 @@
 #  file:  line_loose.txt
 #
 # Reference Line Break rules for intltest rbbi/RBBIMonkeyTest.
-# Rules derived from Unicode Standard Annex #14 Revision 40 for Unicode 11.0
+# Rules derived from Unicode Standard Annex #14 Revision 44 for Unicode 13.0
 #
 # Note: Rule syntax and the monkey test itself are still a work in progress.
 #       They are expected to change with review and the addition of support for rule tailoring.
@ -69,6 +69,13 @@ XX = [:LineBreak =  Unknown:];
 ZW = [:LineBreak =  ZWSpace:];
 ZWJ = [:LineBreak =  ZWJ:];

+# OP30 and CP30 are variants of OP and CP that appear in rule LB30 from UAX 14.
+# Limitations of this monkey test rule parser require that these definitions be pulled out
+# rather than appearing in-line in LB 30.
+
+OP30 = [OP - [\p{ea=F}\p{ea=W}\p{ea=H}]];
+CP30 = [CP - [\p{ea=F}\p{ea=W}\p{ea=H}]];
+
 # LB1 - Resolve AI, CB, CJ, SA, SG, and XX into other line breaking classes
 AL = [AL AI SG XX ];
 dictionary = SA;
@ -173,11 +180,8 @@ LB21.2:      BB CM* [^CM CB];

 LB21b:       SY CM* HL;

-LB22.1:        (AL | HL | CM) CM* IN;   # The CM is from LB10, treat an unattached CM as AL.
-LB22.2:       EX CM* IN;
-LB22.3:       (ID | EB | EM) CM* IN;
-# LB22.4:       IN CM* IN;  # delete this rule for CSS loose.
-LB22.5:       NU CM* IN;
+
+LB22:        [^IN] CM* IN;   # For CSS Loose, allow breaks between adjacent ellipses characters.

 LB23.1:      (AL | HL | CM) CM* NU;
 LB23.2:      NU CM* (AL | HL);
@ -204,13 +208,13 @@ LB28:        (AL | HL | CM)CM* (AL | HL);
 LB29:        IS CM* (AL | HL);

 # LB30  is adjusted for unattached leading CM being treated as AL.
-LB30.1:      (AL | CM | HL | NU) CM* OP;
-LB30.2:      CP CM* (AL | HL | NU);
+LB30.1:      (AL | CM | HL | NU) CM* OP30;
+LB30.2:      CP30 CM* (AL | HL | NU);

 # LB30a  keep pairs of RI together.
-LB30a.1:     RI CM* RI         ÷ [^BK CR LF NL SP ZW WJ CL CP EX IS SY GL QU BA HY NS CM];
-LB30a.2:     RI CM* RI CM* CMS ÷ [^BK CR LF NL SP ZW WJ CL CP EX IS SY GL QU BA HY NS CM];
-LB30a.3:     RI CM* RI CM* [BK CR LF NL SP ZW WJ GL CL CP EX IS SY QU BA HY NS ZWJ]?;
+LB30a.1:     RI CM* RI         ÷ [^BK CR LF NL SP ZW WJ CL CP EX IS SY GL QU BA HY NS IN CM];
+LB30a.2:     RI CM* RI CM* CMS ÷ [^BK CR LF NL SP ZW WJ CL CP EX IS SY GL QU BA HY NS IN CM];
+LB30a.3:     RI CM* RI CM* [BK CR LF NL SP ZW WJ GL CL CP EX IS SY QU BA HY NS IN ZWJ]?;

 # LB30b Do not break between Emoji Base and Emoji Modifier
 LB30b:       EB CM* EM;
--- a/icu4c/source/test/testdata/break_rules/line_loose_cj.txt
+++ b/icu4c/source/test/testdata/break_rules/line_loose_cj.txt
@ -6,16 +6,15 @@
 #  file:  line_loose_cj.txt
 #
 # Reference Line Break rules for intltest rbbi/RBBIMonkeyTest.
-# Rules derived from Unicode Standard Annex #14 Revision 40 for Unicode 11.0
+# Rules derived from Unicode Standard Annex #14 Revision 44 for Unicode 13.0
 #
 # Note: Rule syntax and the monkey test itself are still a work in progress.
 #       They are expected to change with review and the addition of support for rule tailoring.
 #
 #         Line Breaking Rules
 #         Implement default line breaking as defined by
-#         Unicode Standard Annex #14 Revision 34 for Unicode 8.0
-#         http://www.unicode.org/reports/tr14/
-#         tailored as noted in 2nd paragraph below..
+#         Unicode Standard Annex #14
+#         http://www.unicode.org/reports/tr14/, tailored as noted below.
 #
 #         This tailors the line break behavior to correspond to CSS
 #         line-break=loose (BCP47 -u-lb-loose) as defined for Chinese & Japanese.
@ -87,6 +86,13 @@ XX = [:LineBreak =  Unknown:];
 ZW = [:LineBreak =  ZWSpace:];
 ZWJ = [:LineBreak =  ZWJ:];

+# OP30 and CP30 are variants of OP and CP that appear in rule LB30 from UAX 14.
+# Limitations of this monkey test rule parser require that these definitions be pulled out
+# rather than appearing in-line in LB 30.
+
+OP30 = [OP - [\p{ea=F}\p{ea=W}\p{ea=H}]];
+CP30 = [CP - [\p{ea=F}\p{ea=W}\p{ea=H}]];
+
 # LB1 - Resolve AI, CB, CJ, SA, SG, and XX into other line breaking classes
 AL = [AL AI SG XX ];
 dictionary = SA;
@ -196,11 +202,8 @@ LB21.2:      BB CM* [^CM CB];

 LB21b:       SY CM* HL;

-LB22.1:        (AL | HL | CM) CM* IN;   # The CM is from LB10, treat an unattached CM as AL.
-LB22.2:       EX CM* IN;
-LB22.3:       (ID | EB | EM) CM* IN;
-# LB22.4:       IN CM* IN;  # delete this rule for CSS loose.
-LB22.5:       NU CM* IN;
+
+LB22:        [^IN] CM* IN;   # For CSS Loose, allow breaks between adjacent ellipses characters.

 LB23.1:      (AL | HL | CM) CM* NU;
 LB23.2:      NU CM* (AL | HL);
@ -227,13 +230,13 @@ LB28:        (AL | HL | CM)CM* (AL | HL);
 LB29:        IS CM* (AL | HL);

 # LB30  is adjusted for unattached leading CM being treated as AL.
-LB30.1:      (AL | CM | HL | NU) CM* OP;
-LB30.2:      CP CM* (AL | HL | NU);
+LB30.1:      (AL | CM | HL | NU) CM* OP30;
+LB30.2:      CP30 CM* (AL | HL | NU);

 # LB30a  keep pairs of RI together.
-LB30a.1:     RI CM* RI         ÷ [^BK CR LF NL SP ZW WJ CL CP EX IS SY GL QU BA HY NS CM];
-LB30a.2:     RI CM* RI CM* CMS ÷ [^BK CR LF NL SP ZW WJ CL CP EX IS SY GL QU BA HY NS CM];
-LB30a.3:     RI CM* RI CM* [BK CR LF NL SP ZW WJ GL CL CP EX IS SY QU BA HY NS ZWJ]?;
+LB30a.1:     RI CM* RI         ÷ [^BK CR LF NL SP ZW WJ CL CP EX IS SY GL QU BA HY NS IN CM];
+LB30a.2:     RI CM* RI CM* CMS ÷ [^BK CR LF NL SP ZW WJ CL CP EX IS SY GL QU BA HY NS IN CM];
+LB30a.3:     RI CM* RI CM* [BK CR LF NL SP ZW WJ GL CL CP EX IS SY QU BA HY NS IN ZWJ]?;

 # LB30b Do not break between Emoji Base and Emoji Modifier
 LB30b:       EB CM* EM;
--- a/icu4c/source/test/testdata/break_rules/line_normal.txt
+++ b/icu4c/source/test/testdata/break_rules/line_normal.txt
@ -6,20 +6,15 @@
 # file: line_normal.txt
 #
 # Reference Line Break rules for intltest rbbi/RBBIMonkeyTest.
-# Rules derived from Unicode Standard Annex #14 Revision 40 for Unicode 11.0
+# Rules derived from Unicode Standard Annex #14 Revision 44 for Unicode 13.0
 #
 # Note: Rule syntax and the monkey test itself are still a work in progress.
 #       They are expected to change with review and the addition of support for rule tailoring.
 #
 #         Line Breaking Rules
 #         Implement default line breaking as defined by
-#         Unicode Standard Annex #14 Revision 34 for Unicode 8.0
-#         http://www.unicode.org/reports/tr14/
-#         tailored as noted in 2nd paragraph below.
-#
-#         TODO:  Rule LB 8 remains as it was in Unicode 5.2
-#         This is only because of a limitation of ICU break engine implementation,
-#         not because the older behavior is desirable.
+#         Unicode Standard Annex #14
+#         http://www.unicode.org/reports/tr14/, tailored as noted below.
 #
 #         This tailors the line break behavior to correspond to CSS
 #         line-break=normal (BCP47 -u-lb-normal) as defined for languages other than
@ -75,6 +70,13 @@ XX = [:LineBreak =  Unknown:];
 ZW = [:LineBreak =  ZWSpace:];
 ZWJ = [:LineBreak =  ZWJ:];

+# OP30 and CP30 are variants of OP and CP that appear in rule LB30 from UAX 14.
+# Limitations of this monkey test rule parser require that these definitions be pulled out
+# rather than appearing in-line in LB 30.
+
+OP30 = [OP - [\p{ea=F}\p{ea=W}\p{ea=H}]];
+CP30 = [CP - [\p{ea=F}\p{ea=W}\p{ea=H}]];
+
 # LB1 - Resolve AI, CB, CJ, SA, SG, and XX into other line breaking classes
 AL = [AL AI SG XX ];
 dictionary = SA;
@ -179,11 +181,7 @@ LB21.2:      BB CM* [^CM CB];

 LB21b:       SY CM* HL;

-LB22.1:        (AL | HL | CM) CM* IN;   # The CM is from LB10, treat an unattached CM as AL.
-LB22.2:       EX CM* IN;
-LB22.3:       (ID | EB | EM) CM* IN;
-LB22.4:       IN CM* IN;
-LB22.5:       NU CM* IN;
+LB22:        . CM* IN;

 LB23.1:      (AL | HL | CM) CM* NU;
 LB23.2:      NU CM* (AL | HL);
@ -210,13 +208,13 @@ LB28:        (AL | HL | CM)CM* (AL | HL);
 LB29:        IS CM* (AL | HL);

 # LB30  is adjusted for unattached leading CM being treated as AL.
-LB30.1:      (AL | CM | HL | NU) CM* OP;
-LB30.2:      CP CM* (AL | HL | NU);
+LB30.1:      (AL | CM | HL | NU) CM* OP30;
+LB30.2:      CP30 CM* (AL | HL | NU);

 # LB30a  keep pairs of RI together.
-LB30a.1:     RI CM* RI         ÷ [^BK CR LF NL SP ZW WJ CL CP EX IS SY GL QU BA HY NS CM];
-LB30a.2:     RI CM* RI CM* CMS ÷ [^BK CR LF NL SP ZW WJ CL CP EX IS SY GL QU BA HY NS CM];
-LB30a.3:     RI CM* RI CM* [BK CR LF NL SP ZW WJ GL CL CP EX IS SY QU BA HY NS ZWJ]?;
+LB30a.1:     RI CM* RI         ÷ [^BK CR LF NL SP ZW WJ CL CP EX IS SY GL QU BA HY NS IN CM];
+LB30a.2:     RI CM* RI CM* CMS ÷ [^BK CR LF NL SP ZW WJ CL CP EX IS SY GL QU BA HY NS IN CM];
+LB30a.3:     RI CM* RI CM* [BK CR LF NL SP ZW WJ GL CL CP EX IS SY QU BA HY NS IN ZWJ]?;

 # LB30b Do not break between Emoji Base and Emoji Modifier
 LB30b:       EB CM* EM;
--- a/icu4c/source/test/testdata/break_rules/line_normal_cj.txt
+++ b/icu4c/source/test/testdata/break_rules/line_normal_cj.txt
@ -6,20 +6,15 @@
 #  file:  line_normal_cj.txt
 #
 # Reference Line Break rules for intltest rbbi/RBBIMonkeyTest.
-# Rules derived from Unicode Standard Annex #14 Revision 40 for Unicode 11.0
+# Rules derived from Unicode Standard Annex #14 Revision 44 for Unicode 13.0
 #
 # Note: Rule syntax and the monkey test itself are still a work in progress.
 #       They are expected to change with review and the addition of support for rule tailoring.
 #
 #         Line Breaking Rules
 #         Implement default line breaking as defined by
-#         Unicode Standard Annex #14 Revision 34 for Unicode 8.0
-#         http://www.unicode.org/reports/tr14/
-#         tailored as noted in 2nd paragraph below.
-#
-#         TODO:  Rule LB 8 remains as it was in Unicode 5.2
-#         This is only because of a limitation of ICU break engine implementation,
-#         not because the older behavior is desirable.
+#         Unicode Standard Annex #14
+#         http://www.unicode.org/reports/tr14/, tailored as noted below.
 #
 #         This tailors the line break behavior to correspond to CSS
 #         line-break=normal (BCP47 -u-lb-normal) as defined for Chinese & Japanese.
@ -78,6 +73,13 @@ XX = [:LineBreak =  Unknown:];
 ZW = [:LineBreak =  ZWSpace:];
 ZWJ = [:LineBreak =  ZWJ:];

+# OP30 and CP30 are variants of OP and CP that appear in rule LB30 from UAX 14.
+# Limitations of this monkey test rule parser require that these definitions be pulled out
+# rather than appearing in-line in LB 30.
+
+OP30 = [OP - [\p{ea=F}\p{ea=W}\p{ea=H}]];
+CP30 = [CP - [\p{ea=F}\p{ea=W}\p{ea=H}]];
+
 # LB1 - Resolve AI, CB, CJ, SA, SG, and XX into other line breaking classes
 AL = [AL AI SG XX ];
 dictionary = SA;
@ -188,11 +190,7 @@ LB21.2:      BB CM* [^CM CB];

 LB21b:       SY CM* HL;

-LB22.1:        (AL | HL | CM) CM* IN;   # The CM is from LB10, treat an unattached CM as AL.
-LB22.2:       EX CM* IN;
-LB22.3:       (ID | EB | EM) CM* IN;
-LB22.4:       IN CM* IN;
-LB22.5:       NU CM* IN;
+LB22:        . CM* IN;

 LB23.1:      (AL | HL | CM) CM* NU;
 LB23.2:      NU CM* (AL | HL);
@ -218,13 +216,13 @@ LB28:        (AL | HL | CM)CM* (AL | HL);
 LB29:        IS CM* (AL | HL);

 # LB30  is adjusted for unattached leading CM being treated as AL.
-LB30.1:      (AL | CM | HL | NU) CM* OP;
-LB30.2:      CP CM* (AL | HL | NU);
+LB30.1:      (AL | CM | HL | NU) CM* OP30;
+LB30.2:      CP30 CM* (AL | HL | NU);

 # LB30a  keep pairs of RI together.
-LB30a.1:     RI CM* RI         ÷ [^BK CR LF NL SP ZW WJ CL CP EX IS SY GL QU BA HY NS CM];
-LB30a.2:     RI CM* RI CM* CMS ÷ [^BK CR LF NL SP ZW WJ CL CP EX IS SY GL QU BA HY NS CM];
-LB30a.3:     RI CM* RI CM* [BK CR LF NL SP ZW WJ GL CL CP EX IS SY QU BA HY NS ZWJ]?;
+LB30a.1:     RI CM* RI         ÷ [^BK CR LF NL SP ZW WJ CL CP EX IS SY GL QU BA HY NS IN CM];
+LB30a.2:     RI CM* RI CM* CMS ÷ [^BK CR LF NL SP ZW WJ CL CP EX IS SY GL QU BA HY NS IN CM];
+LB30a.3:     RI CM* RI CM* [BK CR LF NL SP ZW WJ GL CL CP EX IS SY QU BA HY NS IN ZWJ]?;

 # LB30b Do not break between Emoji Base and Emoji Modifier
 LB30b:       EB CM* EM;
--- a/icu4j/main/shared/data/icudata.jar
+++ b/icu4j/main/shared/data/icudata.jar
@ -1,3 +1,3 @@
 version https://git-lfs.github.com/spec/v1
-oid sha256:96831df582da28121b19cf3faed7e84529ad0c1113b14cad0e01fabd4875c679
-size 12998991
+oid sha256:f9b73d720421a85704fc64aa0949c94d52e450a44af96c715881e9e6ab0fa3e6
+size 12998988
--- a/icu4j/main/tests/core/src/com/ibm/icu/dev/test/rbbi/break_rules/line_cj.txt
+++ b/icu4j/main/tests/core/src/com/ibm/icu/dev/test/rbbi/break_rules/line_cj.txt
@ -6,7 +6,7 @@
 # file: line.txt
 #
 # Reference Line Break rules for intltest rbbi/RBBIMonkeyTest.
-# Rules derived from Unicode Standard Annex #14 Revision 40 for Unicode 11.0
+# Rules derived from Unicode Standard Annex #14 Revision 44 for Unicode 13.0
 #
 # Note: Rule syntax and the monkey test itself are still a work in progress.
 #       They are expected to change with review and the addition of support for rule tailoring.
@ -61,6 +61,20 @@ XX = [:LineBreak =  Unknown:];
 ZW = [:LineBreak =  ZWSpace:];
 ZWJ = [:LineBreak =  ZWJ:];

+# OP30 and CP30 are variants of OP and CP that appear in rule LB30 from UAX 14.
+# Limitations of this monkey test rule parser require that these definitions be pulled out
+# rather than appearing in-line in LB 30.
+
+OP30 = [OP - [\p{ea=F}\p{ea=W}\p{ea=H}]];
+CP30 = [CP - [\p{ea=F}\p{ea=W}\p{ea=H}]];
+
+# OP30 and CP30 are variants of OP and CP that appear in rule LB30 from UAX 14.
+# Limitations of this monkey test rule parser require that these definitions be pulled out
+# rather than appearing in-line in LB 30.
+
+OP30 = [OP - [\p{ea=F}\p{ea=W}\p{ea=H}]];
+CP30 = [CP - [\p{ea=F}\p{ea=W}\p{ea=H}]];
+
 # LB1 - Resolve AI, CB, CJ, SA, SG, and XX into other line breaking classes
 AL = [AL AI SG XX ];
 dictionary = SA;
@ -165,11 +179,7 @@ LB21.2:      BB CM* [^CM CB];

 LB21b:       SY CM* HL;

-LB22.1:        (AL | HL | CM) CM* IN;   # The CM is from LB10, treat an unattached CM as AL.
-LB22.2:       EX CM* IN;
-LB22.3:       (ID | EB | EM) CM* IN;
-LB22.4:       IN CM* IN;
-LB22.5:       NU CM* IN;
+LB22:        . CM* IN;

 LB23.1:      (AL | HL | CM) CM* NU;
 LB23.2:      NU CM* (AL | HL);
@ -196,13 +206,13 @@ LB28:        (AL | HL | CM)CM* (AL | HL);
 LB29:        IS CM* (AL | HL);

 # LB30  is adjusted for unattached leading CM being treated as AL.
-LB30.1:      (AL | CM | HL | NU) CM* OP;
-LB30.2:      CP CM* (AL | HL | NU);
+LB30.1:      (AL | CM | HL | NU) CM* OP30;
+LB30.2:      CP30 CM* (AL | HL | NU);

 # LB30a  keep pairs of RI together.
-LB30a.1:     RI CM* RI         ÷ [^BK CR LF NL SP ZW WJ CL CP EX IS SY GL QU BA HY NS CM];
-LB30a.2:     RI CM* RI CM* CMS ÷ [^BK CR LF NL SP ZW WJ CL CP EX IS SY GL QU BA HY NS CM];
-LB30a.3:     RI CM* RI CM* [BK CR LF NL SP ZW WJ GL CL CP EX IS SY QU BA HY NS ZWJ]?;
+LB30a.1:     RI CM* RI         ÷ [^BK CR LF NL SP ZW WJ CL CP EX IS SY GL QU BA HY NS IN CM];
+LB30a.2:     RI CM* RI CM* CMS ÷ [^BK CR LF NL SP ZW WJ CL CP EX IS SY GL QU BA HY NS IN CM];
+LB30a.3:     RI CM* RI CM* [BK CR LF NL SP ZW WJ GL CL CP EX IS SY QU BA HY NS IN ZWJ]?;

 # LB30b Do not break between Emoji Base and Emoji Modifier
 LB30b:       EB CM* EM;
--- a/icu4j/main/tests/core/src/com/ibm/icu/dev/test/rbbi/break_rules/line_loose.txt
+++ b/icu4j/main/tests/core/src/com/ibm/icu/dev/test/rbbi/break_rules/line_loose.txt
@ -6,7 +6,7 @@
 #  file:  line_loose.txt
 #
 # Reference Line Break rules for intltest rbbi/RBBIMonkeyTest.
-# Rules derived from Unicode Standard Annex #14 Revision 40 for Unicode 11.0
+# Rules derived from Unicode Standard Annex #14 Revision 44 for Unicode 13.0
 #
 # Note: Rule syntax and the monkey test itself are still a work in progress.
 #       They are expected to change with review and the addition of support for rule tailoring.
@ -69,6 +69,13 @@ XX = [:LineBreak =  Unknown:];
 ZW = [:LineBreak =  ZWSpace:];
 ZWJ = [:LineBreak =  ZWJ:];

+# OP30 and CP30 are variants of OP and CP that appear in rule LB30 from UAX 14.
+# Limitations of this monkey test rule parser require that these definitions be pulled out
+# rather than appearing in-line in LB 30.
+
+OP30 = [OP - [\p{ea=F}\p{ea=W}\p{ea=H}]];
+CP30 = [CP - [\p{ea=F}\p{ea=W}\p{ea=H}]];
+
 # LB1 - Resolve AI, CB, CJ, SA, SG, and XX into other line breaking classes
 AL = [AL AI SG XX ];
 dictionary = SA;
@ -173,11 +180,8 @@ LB21.2:      BB CM* [^CM CB];

 LB21b:       SY CM* HL;

-LB22.1:        (AL | HL | CM) CM* IN;   # The CM is from LB10, treat an unattached CM as AL.
-LB22.2:       EX CM* IN;
-LB22.3:       (ID | EB | EM) CM* IN;
-# LB22.4:       IN CM* IN;  # delete this rule for CSS loose.
-LB22.5:       NU CM* IN;
+
+LB22:        [^IN] CM* IN;   # For CSS Loose, allow breaks between adjacent ellipses characters.

 LB23.1:      (AL | HL | CM) CM* NU;
 LB23.2:      NU CM* (AL | HL);
@ -204,13 +208,13 @@ LB28:        (AL | HL | CM)CM* (AL | HL);
 LB29:        IS CM* (AL | HL);

 # LB30  is adjusted for unattached leading CM being treated as AL.
-LB30.1:      (AL | CM | HL | NU) CM* OP;
-LB30.2:      CP CM* (AL | HL | NU);
+LB30.1:      (AL | CM | HL | NU) CM* OP30;
+LB30.2:      CP30 CM* (AL | HL | NU);

 # LB30a  keep pairs of RI together.
-LB30a.1:     RI CM* RI         ÷ [^BK CR LF NL SP ZW WJ CL CP EX IS SY GL QU BA HY NS CM];
-LB30a.2:     RI CM* RI CM* CMS ÷ [^BK CR LF NL SP ZW WJ CL CP EX IS SY GL QU BA HY NS CM];
-LB30a.3:     RI CM* RI CM* [BK CR LF NL SP ZW WJ GL CL CP EX IS SY QU BA HY NS ZWJ]?;
+LB30a.1:     RI CM* RI         ÷ [^BK CR LF NL SP ZW WJ CL CP EX IS SY GL QU BA HY NS IN CM];
+LB30a.2:     RI CM* RI CM* CMS ÷ [^BK CR LF NL SP ZW WJ CL CP EX IS SY GL QU BA HY NS IN CM];
+LB30a.3:     RI CM* RI CM* [BK CR LF NL SP ZW WJ GL CL CP EX IS SY QU BA HY NS IN ZWJ]?;

 # LB30b Do not break between Emoji Base and Emoji Modifier
 LB30b:       EB CM* EM;
--- a/icu4j/main/tests/core/src/com/ibm/icu/dev/test/rbbi/break_rules/line_loose_cj.txt
+++ b/icu4j/main/tests/core/src/com/ibm/icu/dev/test/rbbi/break_rules/line_loose_cj.txt
@ -6,16 +6,15 @@
 #  file:  line_loose_cj.txt
 #
 # Reference Line Break rules for intltest rbbi/RBBIMonkeyTest.
-# Rules derived from Unicode Standard Annex #14 Revision 40 for Unicode 11.0
+# Rules derived from Unicode Standard Annex #14 Revision 44 for Unicode 13.0
 #
 # Note: Rule syntax and the monkey test itself are still a work in progress.
 #       They are expected to change with review and the addition of support for rule tailoring.
 #
 #         Line Breaking Rules
 #         Implement default line breaking as defined by
-#         Unicode Standard Annex #14 Revision 34 for Unicode 8.0
-#         http://www.unicode.org/reports/tr14/
-#         tailored as noted in 2nd paragraph below..
+#         Unicode Standard Annex #14
+#         http://www.unicode.org/reports/tr14/, tailored as noted below.
 #
 #         This tailors the line break behavior to correspond to CSS
 #         line-break=loose (BCP47 -u-lb-loose) as defined for Chinese & Japanese.
@ -87,6 +86,13 @@ XX = [:LineBreak =  Unknown:];
 ZW = [:LineBreak =  ZWSpace:];
 ZWJ = [:LineBreak =  ZWJ:];

+# OP30 and CP30 are variants of OP and CP that appear in rule LB30 from UAX 14.
+# Limitations of this monkey test rule parser require that these definitions be pulled out
+# rather than appearing in-line in LB 30.
+
+OP30 = [OP - [\p{ea=F}\p{ea=W}\p{ea=H}]];
+CP30 = [CP - [\p{ea=F}\p{ea=W}\p{ea=H}]];
+
 # LB1 - Resolve AI, CB, CJ, SA, SG, and XX into other line breaking classes
 AL = [AL AI SG XX ];
 dictionary = SA;
@ -196,11 +202,8 @@ LB21.2:      BB CM* [^CM CB];

 LB21b:       SY CM* HL;

-LB22.1:        (AL | HL | CM) CM* IN;   # The CM is from LB10, treat an unattached CM as AL.
-LB22.2:       EX CM* IN;
-LB22.3:       (ID | EB | EM) CM* IN;
-# LB22.4:       IN CM* IN;  # delete this rule for CSS loose.
-LB22.5:       NU CM* IN;
+
+LB22:        [^IN] CM* IN;   # For CSS Loose, allow breaks between adjacent ellipses characters.

 LB23.1:      (AL | HL | CM) CM* NU;
 LB23.2:      NU CM* (AL | HL);
@ -227,13 +230,13 @@ LB28:        (AL | HL | CM)CM* (AL | HL);
 LB29:        IS CM* (AL | HL);

 # LB30  is adjusted for unattached leading CM being treated as AL.
-LB30.1:      (AL | CM | HL | NU) CM* OP;
-LB30.2:      CP CM* (AL | HL | NU);
+LB30.1:      (AL | CM | HL | NU) CM* OP30;
+LB30.2:      CP30 CM* (AL | HL | NU);

 # LB30a  keep pairs of RI together.
-LB30a.1:     RI CM* RI         ÷ [^BK CR LF NL SP ZW WJ CL CP EX IS SY GL QU BA HY NS CM];
-LB30a.2:     RI CM* RI CM* CMS ÷ [^BK CR LF NL SP ZW WJ CL CP EX IS SY GL QU BA HY NS CM];
-LB30a.3:     RI CM* RI CM* [BK CR LF NL SP ZW WJ GL CL CP EX IS SY QU BA HY NS ZWJ]?;
+LB30a.1:     RI CM* RI         ÷ [^BK CR LF NL SP ZW WJ CL CP EX IS SY GL QU BA HY NS IN CM];
+LB30a.2:     RI CM* RI CM* CMS ÷ [^BK CR LF NL SP ZW WJ CL CP EX IS SY GL QU BA HY NS IN CM];
+LB30a.3:     RI CM* RI CM* [BK CR LF NL SP ZW WJ GL CL CP EX IS SY QU BA HY NS IN ZWJ]?;

 # LB30b Do not break between Emoji Base and Emoji Modifier
 LB30b:       EB CM* EM;
--- a/icu4j/main/tests/core/src/com/ibm/icu/dev/test/rbbi/break_rules/line_normal.txt
+++ b/icu4j/main/tests/core/src/com/ibm/icu/dev/test/rbbi/break_rules/line_normal.txt
@ -6,20 +6,15 @@
 # file: line_normal.txt
 #
 # Reference Line Break rules for intltest rbbi/RBBIMonkeyTest.
-# Rules derived from Unicode Standard Annex #14 Revision 40 for Unicode 11.0
+# Rules derived from Unicode Standard Annex #14 Revision 44 for Unicode 13.0
 #
 # Note: Rule syntax and the monkey test itself are still a work in progress.
 #       They are expected to change with review and the addition of support for rule tailoring.
 #
 #         Line Breaking Rules
 #         Implement default line breaking as defined by
-#         Unicode Standard Annex #14 Revision 34 for Unicode 8.0
-#         http://www.unicode.org/reports/tr14/
-#         tailored as noted in 2nd paragraph below.
-#
-#         TODO:  Rule LB 8 remains as it was in Unicode 5.2
-#         This is only because of a limitation of ICU break engine implementation,
-#         not because the older behavior is desirable.
+#         Unicode Standard Annex #14
+#         http://www.unicode.org/reports/tr14/, tailored as noted below.
 #
 #         This tailors the line break behavior to correspond to CSS
 #         line-break=normal (BCP47 -u-lb-normal) as defined for languages other than
@ -75,6 +70,13 @@ XX = [:LineBreak =  Unknown:];
 ZW = [:LineBreak =  ZWSpace:];
 ZWJ = [:LineBreak =  ZWJ:];

+# OP30 and CP30 are variants of OP and CP that appear in rule LB30 from UAX 14.
+# Limitations of this monkey test rule parser require that these definitions be pulled out
+# rather than appearing in-line in LB 30.
+
+OP30 = [OP - [\p{ea=F}\p{ea=W}\p{ea=H}]];
+CP30 = [CP - [\p{ea=F}\p{ea=W}\p{ea=H}]];
+
 # LB1 - Resolve AI, CB, CJ, SA, SG, and XX into other line breaking classes
 AL = [AL AI SG XX ];
 dictionary = SA;
@ -179,11 +181,7 @@ LB21.2:      BB CM* [^CM CB];

 LB21b:       SY CM* HL;

-LB22.1:        (AL | HL | CM) CM* IN;   # The CM is from LB10, treat an unattached CM as AL.
-LB22.2:       EX CM* IN;
-LB22.3:       (ID | EB | EM) CM* IN;
-LB22.4:       IN CM* IN;
-LB22.5:       NU CM* IN;
+LB22:        . CM* IN;

 LB23.1:      (AL | HL | CM) CM* NU;
 LB23.2:      NU CM* (AL | HL);
@ -210,13 +208,13 @@ LB28:        (AL | HL | CM)CM* (AL | HL);
 LB29:        IS CM* (AL | HL);

 # LB30  is adjusted for unattached leading CM being treated as AL.
-LB30.1:      (AL | CM | HL | NU) CM* OP;
-LB30.2:      CP CM* (AL | HL | NU);
+LB30.1:      (AL | CM | HL | NU) CM* OP30;
+LB30.2:      CP30 CM* (AL | HL | NU);

 # LB30a  keep pairs of RI together.
-LB30a.1:     RI CM* RI         ÷ [^BK CR LF NL SP ZW WJ CL CP EX IS SY GL QU BA HY NS CM];
-LB30a.2:     RI CM* RI CM* CMS ÷ [^BK CR LF NL SP ZW WJ CL CP EX IS SY GL QU BA HY NS CM];
-LB30a.3:     RI CM* RI CM* [BK CR LF NL SP ZW WJ GL CL CP EX IS SY QU BA HY NS ZWJ]?;
+LB30a.1:     RI CM* RI         ÷ [^BK CR LF NL SP ZW WJ CL CP EX IS SY GL QU BA HY NS IN CM];
+LB30a.2:     RI CM* RI CM* CMS ÷ [^BK CR LF NL SP ZW WJ CL CP EX IS SY GL QU BA HY NS IN CM];
+LB30a.3:     RI CM* RI CM* [BK CR LF NL SP ZW WJ GL CL CP EX IS SY QU BA HY NS IN ZWJ]?;

 # LB30b Do not break between Emoji Base and Emoji Modifier
 LB30b:       EB CM* EM;
--- a/icu4j/main/tests/core/src/com/ibm/icu/dev/test/rbbi/break_rules/line_normal_cj.txt
+++ b/icu4j/main/tests/core/src/com/ibm/icu/dev/test/rbbi/break_rules/line_normal_cj.txt
@ -6,20 +6,15 @@
 #  file:  line_normal_cj.txt
 #
 # Reference Line Break rules for intltest rbbi/RBBIMonkeyTest.
-# Rules derived from Unicode Standard Annex #14 Revision 40 for Unicode 11.0
+# Rules derived from Unicode Standard Annex #14 Revision 44 for Unicode 13.0
 #
 # Note: Rule syntax and the monkey test itself are still a work in progress.
 #       They are expected to change with review and the addition of support for rule tailoring.
 #
 #         Line Breaking Rules
 #         Implement default line breaking as defined by
-#         Unicode Standard Annex #14 Revision 34 for Unicode 8.0
-#         http://www.unicode.org/reports/tr14/
-#         tailored as noted in 2nd paragraph below.
-#
-#         TODO:  Rule LB 8 remains as it was in Unicode 5.2
-#         This is only because of a limitation of ICU break engine implementation,
-#         not because the older behavior is desirable.
+#         Unicode Standard Annex #14
+#         http://www.unicode.org/reports/tr14/, tailored as noted below.
 #
 #         This tailors the line break behavior to correspond to CSS
 #         line-break=normal (BCP47 -u-lb-normal) as defined for Chinese & Japanese.
@ -78,6 +73,13 @@ XX = [:LineBreak =  Unknown:];
 ZW = [:LineBreak =  ZWSpace:];
 ZWJ = [:LineBreak =  ZWJ:];

+# OP30 and CP30 are variants of OP and CP that appear in rule LB30 from UAX 14.
+# Limitations of this monkey test rule parser require that these definitions be pulled out
+# rather than appearing in-line in LB 30.
+
+OP30 = [OP - [\p{ea=F}\p{ea=W}\p{ea=H}]];
+CP30 = [CP - [\p{ea=F}\p{ea=W}\p{ea=H}]];
+
 # LB1 - Resolve AI, CB, CJ, SA, SG, and XX into other line breaking classes
 AL = [AL AI SG XX ];
 dictionary = SA;
@ -188,11 +190,7 @@ LB21.2:      BB CM* [^CM CB];

 LB21b:       SY CM* HL;

-LB22.1:        (AL | HL | CM) CM* IN;   # The CM is from LB10, treat an unattached CM as AL.
-LB22.2:       EX CM* IN;
-LB22.3:       (ID | EB | EM) CM* IN;
-LB22.4:       IN CM* IN;
-LB22.5:       NU CM* IN;
+LB22:        . CM* IN;

 LB23.1:      (AL | HL | CM) CM* NU;
 LB23.2:      NU CM* (AL | HL);
@ -218,13 +216,13 @@ LB28:        (AL | HL | CM)CM* (AL | HL);
 LB29:        IS CM* (AL | HL);

 # LB30  is adjusted for unattached leading CM being treated as AL.
-LB30.1:      (AL | CM | HL | NU) CM* OP;
-LB30.2:      CP CM* (AL | HL | NU);
+LB30.1:      (AL | CM | HL | NU) CM* OP30;
+LB30.2:      CP30 CM* (AL | HL | NU);

 # LB30a  keep pairs of RI together.
-LB30a.1:     RI CM* RI         ÷ [^BK CR LF NL SP ZW WJ CL CP EX IS SY GL QU BA HY NS CM];
-LB30a.2:     RI CM* RI CM* CMS ÷ [^BK CR LF NL SP ZW WJ CL CP EX IS SY GL QU BA HY NS CM];
-LB30a.3:     RI CM* RI CM* [BK CR LF NL SP ZW WJ GL CL CP EX IS SY QU BA HY NS ZWJ]?;
+LB30a.1:     RI CM* RI         ÷ [^BK CR LF NL SP ZW WJ CL CP EX IS SY GL QU BA HY NS IN CM];
+LB30a.2:     RI CM* RI CM* CMS ÷ [^BK CR LF NL SP ZW WJ CL CP EX IS SY GL QU BA HY NS IN CM];
+LB30a.3:     RI CM* RI CM* [BK CR LF NL SP ZW WJ GL CL CP EX IS SY QU BA HY NS IN ZWJ]?;

 # LB30b Do not break between Emoji Base and Emoji Modifier
 LB30b:       EB CM* EM;