scuffed-code/tools/unicodetools/com/ibm/text/UCD/UnicodeInvariants.txt

# Invariance tests
# Each line indicates an invariant set relationship to be tested,
# and is of the form:
#
# 	line := set relation set
#
#   relation := '='             // has identical contents to
#            := ('>' | '⊃')    // is proper superset of
#            := ('≥' | '⊇')    // is superset of
#            := ('<' | '⊂')    // is proper subset of
#            := ('≤' | '⊆')    // is subset of
#            := '!'             // has no intersection
#            := '?'             // none of the above (they overlap, and neither contains the other)
#
# A set is a standard UnicodeSet, but where $pv can be used to express properties
#
# 	pv := '$' '×'? prop (('=' | ':') value)?
#
# The × indicates that the property is the previous released version.
#  That is, if the version is 4.0.1, then the × version is 4.0.0
# If the value is missing, it is defaulted to true
# If the value is of the form «...», then the ... is interpreted as a regular expression
# The property can be the short or long form as in the PropertyAliases.txt
# The value (if enumerated) can be the short or long form as in PropertyValueAliases.txt
#
# A UnicodeSet is a boolean combinations of properties and character ranges, as you would see in
#  Perl or other regular-expression languages. Examples:
#	[$General_Category:Unassigned-[a-zA-Z]]
# For details, see http://oss.software.ibm.com/icu/userguide/unicodeSet.html
#
# WARNING: do not use \p{...} or [:...:] syntax, since those will be
# ICU's current version of properties, not the current snapshot's.
# Use the $ notation for properties (listed above) instead.
#
# When this file is parsed, an error message may contain <@>
#  to indicate the location of an error in the input line.

# The following not very interesting, but show examples of use

#$GC:Zs ! $GC:Zp
#$East_Asian_Width:Neutral ? $GC:Uppercase_Letter
$GC:Zs ? $Name:«.*SPACE.*»

[$script:greek&$gc:«.*letter.*»] = [;\u00B5\u00B7\u00C4\u00CB\u00CF\u00D6\u00DC\u00E4\u00EB\u00EF\u00F6\u00FC\u00FF-\u0101\u0112-\u0113\u012A-\u012B\u014C-\u014D\u016A-\u016B\u0178\u01D5-\u01DC\u01DE-\u01E3\u01EC-\u01ED\u022A-\u022D\u0230-\u0233\u0304\u0308\u0313-\u0314\u0342-\u0345\u037A\u037E\u0386-\u038A\u038C\u038E-\u03A1\u03A3-\u03CE\u03D0-\u03D7\u03DB\u03DD\u03DF\u03E1\u03E3\u03E5\u03E7\u03E9\u03EB\u03ED\u03EF-\u03F5\u0401\u0407\u0451\u0457\u04D2-\u04D3\u04DA-\u04DF\u04E2-\u04E7\u04EA-\u04F1\u04F4-\u04F5\u04F8-\u04F9\u1E14-\u1E17\u1E20-\u1E21\u1E26-\u1E27\u1E2E-\u1E2F\u1E38-\u1E39\u1E4E-\u1E53\u1E5C-\u1E5D\u1E7A-\u1E7B\u1E84-\u1E85\u1E8C-\u1E8D\u1E97\u1F00-\u1F15\u1F18-\u1F1D\u1F20-\u1F45\u1F48-\u1F4D\u1F50-\u1F57\u1F59\u1F5B\u1F5D\u1F5F-\u1F7D\u1F80-\u1FB4\u1FB6-\u1FBC\u1FBE\u1FC1-\u1FC4\u1FC6-\u1FCC\u1FCF-\u1FD3\u1FD6-\u1FDB\u1FDF-\u1FEC\u1FF2-\u1FF4\u1FF6-\u1FFC\u2126]

# Examples of parsing errors

# $LBA:Neutral =  $GC:Zp # example of non-existant property
# $LB:foo =  $GC:Zp # example of non-existant value
# $GC:Zs @ $GC:Zp # example of unknown relation

# The following should be real invariants
# For illustration, different alias styles are used

$Line_Break:Unknown = [$General_Category:Unassigned $GeneralCategory:PrivateUse]
$LB:OP = $GC:Ps
$General_Category:Decimal_Number = $Numeric_Type:Decimal
$Whitespace ⊃ [$GC:Zs $GC:Zp $GC:Zl]

# Comparisons across versions

$ID_Start ⊇ $×ID_Start
$ID_Continue ⊇ $×ID_Continue

#$age:4.0.1 = $age4.0.0

# Derivations

$Math = [$GC:Sm $Other_Math]
$Alphabetic = [$GC:Lu $GC:Ll $GC:Lt $GC:Lm $GC:Lo $GC:Nl $Other_Alphabetic]
$Lowercase = [$GC:Ll $Other_Lowercase]
$Uppercase = [$GC:Lu $Other_Uppercase]
$ID_Start = [$GC:Lu $GC:Ll $GC:Lt $GC:Lm $GC:Lo $GC:Nl $Other_ID_Start]
$ID_Continue = [$ID_Start $GC:Mn $GC:Mc $GC:Nd $GC:Pc]
$Default_Ignorable_Code_Point = [[$Other_Default_Ignorable_Code_Point $GC:Cf $GC:Cc $GC:Cs $Variation_Selector $Noncharacter_Code_Point] - [$White_Space\uFFF9-\uFFFB]]
$Grapheme_Extend = [$GC:Me $GC:Mn $Other_Grapheme_Extend]
$Grapheme_Base = [^$GC:Cc $GC:Cf $GC:Cs $GC:Co $GC:Cn $GC:Zl $GC:Zp $Grapheme_Extend]

# "Minimal" Other_: NOT hard requirements; just if we want to be minimal

$Other_Math = [$Math - $GC:Sm]
$Other_Alphabetic = [$Alphabetic - [$GC:Lu $GC:Ll $GC:Lt $GC:Lm $GC:Lo $GC:Nl]]
$Other_Lowercase = [$Lowercase - $GC:Ll]
$Other_Uppercase = [$Uppercase - $GC:Lu]
$Other_ID_Start = [$ID_Start - [$GC:Lu $GC:Ll $GC:Lt $GC:Lm $GC:Lo $GC:Nl]]
$Other_Default_Ignorable_Code_Point = [$Default_Ignorable_Code_Point - [[$GC:Cf $GC:Cc $GC:Cs $Variation_Selector $Noncharacter_Code_Point] - [$White_Space\uFFF9-\uFFFB]]]
$Other_Grapheme_Extend = [$Grapheme_Extend - [$GC:Me $GC:Mn]]

# Testing
$script:greek = $×script:greek
$gc:lm = $script:inherited