2002-06-25 17:23:07 +00:00
|
|
|
#
|
2002-08-09 03:14:43 +00:00
|
|
|
# Copyright (C) 2002, International Business Machines Corporation and others.
|
|
|
|
# All Rights Reserved.
|
2002-06-25 17:23:07 +00:00
|
|
|
#
|
2002-08-09 03:14:43 +00:00
|
|
|
# file: char.txt
|
|
|
|
#
|
|
|
|
# ICU Character Break Rules, also known as Grapheme Cluster Boundaries
|
|
|
|
# See Unicode Technical Report #29.
|
|
|
|
# These rules are based on the proposed draft dated 2002-08-06
|
2002-06-25 17:23:07 +00:00
|
|
|
#
|
|
|
|
|
|
|
|
#
|
|
|
|
# Character Class Definitions.
|
|
|
|
# The names are those from TR29.
|
|
|
|
#
|
|
|
|
$CR = \r;
|
|
|
|
$LF = \n;
|
2002-08-09 03:14:43 +00:00
|
|
|
$Control = [[:Zl:] [:Zp:] [:Cc:] [:Cf:]];
|
2002-06-25 17:23:07 +00:00
|
|
|
|
2002-08-09 03:14:43 +00:00
|
|
|
# Note on $Extend: Earlier versions of TR29 included Mc characters.
|
|
|
|
# To avoid test breakage, Mc is still included for the time being.
|
|
|
|
# $Extend = [[:Mn:] [:Me:] \uff9e-\uff9f]; # FF9E..FF9F ; Other_Grapheme_Extend
|
|
|
|
$Extend = [[:Mn:] [:Me:] [:Mc:] \uff9e-\uff9f]; # FF9E..FF9F ; Other_Grapheme_Extend
|
2002-06-25 17:23:07 +00:00
|
|
|
|
|
|
|
#
|
2002-08-09 03:14:43 +00:00
|
|
|
# Korean Syllable Definitions
|
2002-06-25 17:23:07 +00:00
|
|
|
#
|
|
|
|
$L = [\u1100-\u115f];
|
|
|
|
$V = [\u1160-\u11a2];
|
|
|
|
$T = [\u11a8-\u11f9];
|
|
|
|
|
|
|
|
$LV = [ \uac00 \uac1c \uac38 \uac54 \uac70 \uac8c \uaca8 \uacc4 \uace0 \uacfc \uad18 \uad34 \uad50 \uad6c \uad88 \uada4
|
|
|
|
\uadc0 \uaddc \uadf8 \uae14 \uae30 \uae4c \uae68 \uae84 \uaea0 \uaebc \uaed8 \uaef4 \uaf10 \uaf2c \uaf48 \uaf64
|
|
|
|
\uaf80 \uaf9c \uafb8 \uafd4 \uaff0 \ub00c \ub028 \ub044 \ub060 \ub07c \ub098 \ub0b4 \ub0d0 \ub0ec \ub108 \ub124
|
|
|
|
\ub140 \ub15c \ub178 \ub194 \ub1b0 \ub1cc \ub1e8 \ub204 \ub220 \ub23c \ub258 \ub274 \ub290 \ub2ac \ub2c8 \ub2e4
|
|
|
|
\ub300 \ub31c \ub338 \ub354 \ub370 \ub38c \ub3a8 \ub3c4 \ub3e0 \ub3fc \ub418 \ub434 \ub450 \ub46c \ub488 \ub4a4
|
|
|
|
\ub4c0 \ub4dc \ub4f8 \ub514 \ub530 \ub54c \ub568 \ub584 \ub5a0 \ub5bc \ub5d8 \ub5f4 \ub610 \ub62c \ub648 \ub664
|
|
|
|
\ub680 \ub69c \ub6b8 \ub6d4 \ub6f0 \ub70c \ub728 \ub744 \ub760 \ub77c \ub798 \ub7b4 \ub7d0 \ub7ec \ub808 \ub824
|
|
|
|
\ub840 \ub85c \ub878 \ub894 \ub8b0 \ub8cc \ub8e8 \ub904 \ub920 \ub93c \ub958 \ub974 \ub990 \ub9ac \ub9c8 \ub9e4
|
|
|
|
\uba00 \uba1c \uba38 \uba54 \uba70 \uba8c \ubaa8 \ubac4 \ubae0 \ubafc \ubb18 \ubb34 \ubb50 \ubb6c \ubb88 \ubba4
|
|
|
|
\ubbc0 \ubbdc \ubbf8 \ubc14 \ubc30 \ubc4c \ubc68 \ubc84 \ubca0 \ubcbc \ubcd8 \ubcf4 \ubd10 \ubd2c \ubd48 \ubd64
|
|
|
|
\ubd80 \ubd9c \ubdb8 \ubdd4 \ubdf0 \ube0c \ube28 \ube44 \ube60 \ube7c \ube98 \ubeb4 \ubed0 \ubeec \ubf08 \ubf24
|
|
|
|
\ubf40 \ubf5c \ubf78 \ubf94 \ubfb0 \ubfcc \ubfe8 \uc004 \uc020 \uc03c \uc058 \uc074 \uc090 \uc0ac \uc0c8 \uc0e4
|
|
|
|
\uc100 \uc11c \uc138 \uc154 \uc170 \uc18c \uc1a8 \uc1c4 \uc1e0 \uc1fc \uc218 \uc234 \uc250 \uc26c \uc288 \uc2a4
|
|
|
|
\uc2c0 \uc2dc \uc2f8 \uc314 \uc330 \uc34c \uc368 \uc384 \uc3a0 \uc3bc \uc3d8 \uc3f4 \uc410 \uc42c \uc448 \uc464
|
|
|
|
\uc480 \uc49c \uc4b8 \uc4d4 \uc4f0 \uc50c \uc528 \uc544 \uc560 \uc57c \uc598 \uc5b4 \uc5d0 \uc5ec \uc608 \uc624
|
|
|
|
\uc640 \uc65c \uc678 \uc694 \uc6b0 \uc6cc \uc6e8 \uc704 \uc720 \uc73c \uc758 \uc774 \uc790 \uc7ac \uc7c8 \uc7e4
|
|
|
|
\uc800 \uc81c \uc838 \uc854 \uc870 \uc88c \uc8a8 \uc8c4 \uc8e0 \uc8fc \uc918 \uc934 \uc950 \uc96c \uc988 \uc9a4
|
|
|
|
\uc9c0 \uc9dc \uc9f8 \uca14 \uca30 \uca4c \uca68 \uca84 \ucaa0 \ucabc \ucad8 \ucaf4 \ucb10 \ucb2c \ucb48 \ucb64
|
|
|
|
\ucb80 \ucb9c \ucbb8 \ucbd4 \ucbf0 \ucc0c \ucc28 \ucc44 \ucc60 \ucc7c \ucc98 \uccb4 \uccd0 \uccec \ucd08 \ucd24
|
|
|
|
\ucd40 \ucd5c \ucd78 \ucd94 \ucdb0 \ucdcc \ucde8 \uce04 \uce20 \uce3c \uce58 \uce74 \uce90 \uceac \ucec8 \ucee4
|
|
|
|
\ucf00 \ucf1c \ucf38 \ucf54 \ucf70 \ucf8c \ucfa8 \ucfc4 \ucfe0 \ucffc \ud018 \ud034 \ud050 \ud06c \ud088 \ud0a4
|
|
|
|
\ud0c0 \ud0dc \ud0f8 \ud114 \ud130 \ud14c \ud168 \ud184 \ud1a0 \ud1bc \ud1d8 \ud1f4 \ud210 \ud22c \ud248 \ud264
|
|
|
|
\ud280 \ud29c \ud2b8 \ud2d4 \ud2f0 \ud30c \ud328 \ud344 \ud360 \ud37c \ud398 \ud3b4 \ud3d0 \ud3ec \ud408 \ud424
|
|
|
|
\ud440 \ud45c \ud478 \ud494 \ud4b0 \ud4cc \ud4e8 \ud504 \ud520 \ud53c \ud558 \ud574 \ud590 \ud5ac \ud5c8 \ud5e4
|
|
|
|
\ud600 \ud61c \ud638 \ud654 \ud670 \ud68c \ud6a8 \ud6c4 \ud6e0 \ud6fc \ud718 \ud734 \ud750 \ud76c \ud788 ];
|
|
|
|
$LVT = [[\uac00-\ud7a3] - $LV];
|
|
|
|
|
2002-08-09 03:14:43 +00:00
|
|
|
$Control;
|
2002-06-25 17:23:07 +00:00
|
|
|
$CR $LF;
|
2002-08-09 03:14:43 +00:00
|
|
|
([^$Control] | $L+ | $T+ | $L* ($LV? $V+ | $LV | $LVT) $T*) $Extend*;
|
2002-06-25 17:23:07 +00:00
|
|
|
|
|
|
|
|
|
|
|
#
|
2002-08-09 03:14:43 +00:00
|
|
|
# Reverse Rule, back up to the beginning of some preceding grapheme cluster.
|
2002-06-25 17:23:07 +00:00
|
|
|
#
|
2002-08-09 03:14:43 +00:00
|
|
|
! ($Extend | $V | $T )* ($LF $CR | ($LV | $LVT)*$L* | .)?;
|