diff --git a/icu4c/source/data/brkitr/char.txt b/icu4c/source/data/brkitr/char.txt index 05d6992e43..04272b3276 100644 --- a/icu4c/source/data/brkitr/char.txt +++ b/icu4c/source/data/brkitr/char.txt @@ -17,7 +17,7 @@ $CR = \r; $LF = \n; $Control = [[:Zl:] [:Zp:] [:Cc:] [:Cf:]]; -$Extend = [[:Grapheme_Extend = TRUE:]]; +$Extend = [[:Grapheme_Extend = TRUE:] - [$Control]]; # # Korean Syllable Definitions diff --git a/icu4c/source/data/brkitr/word.txt b/icu4c/source/data/brkitr/word.txt index 27a3219f10..aaea0506db 100644 --- a/icu4c/source/data/brkitr/word.txt +++ b/icu4c/source/data/brkitr/word.txt @@ -54,7 +54,7 @@ $Numeric = [:LineBreak = Numeric:]; $CR = \u000d; $LF = \u000a; $Control = [[:Zl:] [:Zp:] [:Cc:] [:Cf:]]; -$Extend = [[:Grapheme_Extend = TRUE:]]; +$Extend = [[:Grapheme_Extend = TRUE:] - $Control]; $Format = [[:Cf:]]; $Hiragana = [:Hiragana:]; $Ideographic = [:IDEOGRAPHIC:]; diff --git a/icu4c/source/i18n/regexst.cpp b/icu4c/source/i18n/regexst.cpp index ca5456e74f..4a14f97551 100644 --- a/icu4c/source/i18n/regexst.cpp +++ b/icu4c/source/i18n/regexst.cpp @@ -114,8 +114,8 @@ static const UChar gIsWordPattern[] = { static const UChar gGC_ExtendPattern[] = { // [ \ p { G r a p h e m e _ 0x5b, 0x5c, 0x70, 0x7b, 0x47, 0x72, 0x61, 0x70, 0x68, 0x65, 0x6d, 0x65, 0x5f, -// E x t e n d } ] - 0x45, 0x78, 0x74, 0x65, 0x6e, 0x64, 0x7d, 0x5d, 0}; +// E x t e n d } - \ p { C f } ] + 0x45, 0x78, 0x74, 0x65, 0x6e, 0x64, 0x7d, 0x2d, 0x5c, 0x70, 0x7b, 0x43, 0x66, 0x7d, 0x5d, 0}; static const UChar gGC_LPattern[] = { // [ \ p { H a n g u l _ S y l