2016-06-30 23:41:56 +00:00
|
|
|
# © 2016 and later: Unicode, Inc. and others.
|
2020-09-12 23:48:36 +00:00
|
|
|
# License & terms of use: http://www.unicode.org/copyright.html
|
2020-09-01 06:22:03 +00:00
|
|
|
# Generated using tools/cldr/cldr-to-icu/build-icu-data.xml
|
2016-09-19 05:09:40 +00:00
|
|
|
#
|
2006-07-21 01:08:32 +00:00
|
|
|
# File: Han_Spacedhan.txt
|
2016-09-19 05:09:40 +00:00
|
|
|
# Generated from CLDR
|
2006-07-21 01:08:32 +00:00
|
|
|
#
|
2016-02-05 03:37:50 +00:00
|
|
|
|
|
|
|
# Only intended for internal use
|
|
|
|
# Make sure Han are normalized, including characters that contain them.
|
|
|
|
# The first set in the filter is computed with http://unicode.org/cldr/utility/list-unicodeset.jsp?a=[:tonfkd:/XXX/:]-[:ideographic:]-[:sc=han:]
|
|
|
|
# Where XXX is the resolved [:ideographic:][:sc=han:]. It needs updating with each Unicode release!
|
|
|
|
:: [[㆒-㆟㈠-㉇㊀-㊰㋀-㋋㍘-㍰㍻-㍿㏠-㏾ 🈐-🈒🈔-🈺🉀-🉈🉐🉑][:ideographic:][:sc=han:]] nfkc;
|
2004-08-02 20:06:55 +00:00
|
|
|
:: fullwidth-halfwidth;
|
2009-04-10 07:47:09 +00:00
|
|
|
。 → '.';
|
2004-08-02 20:06:55 +00:00
|
|
|
$terminalPunct = [\.\,\:\;\?\!.,:?!。、;[:Pe:][:Pf:]];
|
|
|
|
$initialPunct = [:Ps:][:Pi:];
|
2016-02-05 03:37:50 +00:00
|
|
|
# add space between any Han or terminal punctuation and letters, and
|
|
|
|
# between letters and Han or initial punct
|
2009-04-10 07:47:09 +00:00
|
|
|
[[:Ideographic:] $terminalPunct] {} [:Letter:] → ' ' ;
|
|
|
|
[:Letter:] [:Mark:]* {} [[:Ideographic:] $initialPunct] → ' ' ;
|
2016-02-05 03:37:50 +00:00
|
|
|
# remove spacing between ideographs and other letters
|
2009-04-10 07:47:09 +00:00
|
|
|
← [:Ideographic:] { ' ' } [:Letter:] ;
|
|
|
|
← [:Letter:] [:Mark:]* { ' ' } [:Ideographic:] ;
|
2016-02-05 03:37:50 +00:00
|
|
|
|