ICU-4199 enum/name API support for C/POSIX character classes, and UnicodeSet support for [:Assigned:]

X-SVN-Rev: 17730
This commit is contained in:
Markus Scherer 2005-05-28 22:54:36 +00:00
parent 291516499b
commit e6a0df52ee
11 changed files with 1299 additions and 1175 deletions

View File

@ -504,7 +504,7 @@ u_isUAlphabetic(UChar32 c) {
return (u_getUnicodeProperties(c, 1)&U_MASK(UPROPS_ALPHABETIC))!=0;
}
/* Checks if ch is a letter or a decimal digit */
/* Checks if c is a letter or a decimal digit */
U_CAPI UBool U_EXPORT2
u_isalnum(UChar32 c) {
uint32_t props;
@ -512,6 +512,15 @@ u_isalnum(UChar32 c) {
return (UBool)((CAT_MASK(props)&(U_GC_L_MASK|U_GC_ND_MASK))!=0);
}
/**
* Checks if c is alphabetic, or a decimal digit; implements UCHAR_POSIX_ALNUM.
* @internal
*/
U_CFUNC UBool
u_isalnumPOSIX(UChar32 c) {
return (UBool)(u_isUAlphabetic(c) || u_isdigit(c));
}
/* Checks if ch is a unicode character with assigned character type.*/
U_CAPI UBool U_EXPORT2
u_isdefined(UChar32 c) {
@ -577,8 +586,10 @@ u_isblank(UChar32 c) {
if((uint32_t)c<=0x9f) {
return c==9 || c==0x20; /* TAB or SPACE */
} else {
/* White_Space but not LS (Zl) or PS (Zp) */
return u_isUWhiteSpace(c) && ((c&0xfffffffe)!=0x2028);
/* Zs */
uint32_t props;
GET_PROPS(c, props);
return (UBool)(GET_CATEGORY(props)==U_SPACE_SEPARATOR);
}
}
@ -596,6 +607,22 @@ u_isprint(UChar32 c) {
return (UBool)((CAT_MASK(props)&U_GC_C_MASK)==0);
}
/**
* Checks if c is in \p{graph}\p{blank} - \p{cntrl}.
* Implements UCHAR_POSIX_PRINT.
* @internal
*/
U_CFUNC UBool
u_isprintPOSIX(UChar32 c) {
uint32_t props;
GET_PROPS(c, props);
/*
* The only cntrl character in graph+blank is TAB (in blank).
* Here we implement (blank-TAB)=Zs instead of calling u_isblank().
*/
return (UBool)((GET_CATEGORY(props)==U_SPACE_SEPARATOR) || u_isgraphPOSIX(c));
}
U_CAPI UBool U_EXPORT2
u_isgraph(UChar32 c) {
uint32_t props;
@ -606,6 +633,24 @@ u_isgraph(UChar32 c) {
==0);
}
/**
* Checks if c is in
* [^\p{space}\p{gc=Control}\p{gc=Surrogate}\p{gc=Unassigned}]
* with space=\p{Whitespace} and Control=Cc.
* Implements UCHAR_POSIX_GRAPH.
* @internal
*/
U_CFUNC UBool
u_isgraphPOSIX(UChar32 c) {
uint32_t props;
GET_PROPS(c, props);
/* \p{space}\p{gc=Control} == \p{gc=Z}\p{Control} */
/* comparing ==0 returns FALSE for the categories mentioned */
return (UBool)((CAT_MASK(props)&
(U_GC_CC_MASK|U_GC_CS_MASK|U_GC_CN_MASK|U_GC_Z_MASK))
==0);
}
U_CAPI UBool U_EXPORT2
u_ispunct(UChar32 c) {
uint32_t props;
@ -1003,9 +1048,11 @@ uchar_addPropertyStarts(const USetAdder *sa, UErrorCode *pErrorCode) {
/* add code points with hardcoded properties, plus the ones following them */
/* add for u_isblank() */
USET_ADD_CP_AND_NEXT(sa, TAB);
/* add for IS_THAT_CONTROL_SPACE() */
sa->add(sa->set, TAB); /* range TAB..CR */
sa->add(sa->set, CR+1);
sa->add(sa->set, CR+1); /* range TAB..CR */
sa->add(sa->set, 0x1c);
sa->add(sa->set, 0x1f+1);
USET_ADD_CP_AND_NEXT(sa, NL);

View File

@ -77,12 +77,31 @@ U_CDECL_BEGIN
* (In ICU, BreakIterator is the most sophisticated API for word boundaries.)
* Another example: There is no "istitle()" class for titlecase characters.
*
* A summary of the behavior of some C/POSIX character classification implementations
* for Unicode is available at http://oss.software.ibm.com/cvs/icu/~checkout~/icuhtml/design/posix_classes.html
* ICU 3.4 and later provides API access for all twelve C/POSIX character classes.
* ICU implements them according to the Standard Recommendations in
* Annex C: Compatibility Properties of UTS #18 Unicode Regular Expressions
* (http://www.unicode.org/reports/tr18/#Compatibility_Properties).
*
* <strong>Important</strong>:
* The behavior of the ICU C/POSIX-style character classification
* functions is subject to change according to discussion of the above summary.
* API access for C/POSIX character classes is as follows:
* - alpha: u_isUAlphabetic(c) or u_hasBinaryProperty(c, UCHAR_ALPHABETIC)
* - lower: u_isULowercase(c) or u_hasBinaryProperty(c, UCHAR_LOWERCASE)
* - upper: u_isUUppercase(c) or u_hasBinaryProperty(c, UCHAR_UPPERCASE)
* - punct: u_ispunct(c)
* - digit: u_charType(c)==U_DECIMAL_DIGIT_NUMBER
* - xdigit: u_isxdigit(c) or u_hasBinaryProperty(c, UCHAR_POSIX_XDIGIT)
* - alnum: u_hasBinaryProperty(c, UCHAR_POSIX_ALNUM)
* - space: u_isUWhiteSpace(c) or u_hasBinaryProperty(c, UCHAR_WHITE_SPACE)
* - blank: u_isblank(c) or u_hasBinaryProperty(c, UCHAR_POSIX_BLANK)
* - cntrl: u_charType(c)==U_CONTROL_CHAR
* - graph: u_hasBinaryProperty(c, UCHAR_POSIX_GRAPH)
* - print: u_hasBinaryProperty(c, UCHAR_POSIX_PRINT)
*
* Note: Some of the u_isxyz() functions in uchar.h predate, and do not match,
* the Standard Recommendations in UTS #18. Instead, they match Java
* functions according to their API documentation.
*
* The C/POSIX character classes are also available in UnicodeSet patterns,
* using patterns like [:graph:] or \p{graph}.
*
* Note: There are several ICU whitespace functions.
* Comparison:
@ -368,6 +387,31 @@ typedef enum UProperty {
(http://www.unicode.org/reports/tr31/)
@draft ICU 3.4 */
UCHAR_PATTERN_WHITE_SPACE,
/** Binary property alnum (a C/POSIX character class).
Implemented according to the UTS #18 Annex C Standard Recommendation.
See the uchar.h file documentation.
@draft ICU 3.4 */
UCHAR_POSIX_ALNUM,
/** Binary property blank (a C/POSIX character class).
Implemented according to the UTS #18 Annex C Standard Recommendation.
See the uchar.h file documentation.
@draft ICU 3.4 */
UCHAR_POSIX_BLANK,
/** Binary property graph (a C/POSIX character class).
Implemented according to the UTS #18 Annex C Standard Recommendation.
See the uchar.h file documentation.
@draft ICU 3.4 */
UCHAR_POSIX_GRAPH,
/** Binary property print (a C/POSIX character class).
Implemented according to the UTS #18 Annex C Standard Recommendation.
See the uchar.h file documentation.
@draft ICU 3.4 */
UCHAR_POSIX_PRINT,
/** Binary property xdigit (a C/POSIX character class).
Implemented according to the UTS #18 Annex C Standard Recommendation.
See the uchar.h file documentation.
@draft ICU 3.4 */
UCHAR_POSIX_XDIGIT,
/** One more than the last constant for binary Unicode properties. @stable ICU 2.1 */
UCHAR_BINARY_LIMIT,
@ -1739,7 +1783,6 @@ u_getNumericValue(UChar32 c);
* @see UCHAR_LOWERCASE
* @see u_isupper
* @see u_istitle
* @see u_islower
* @stable ICU 2.0
*/
U_STABLE UBool U_EXPORT2

View File

@ -569,7 +569,8 @@ public:
* correspond to the following sets:
*
* "ANY" = [\\u0000-\\U0010FFFF],
* "ASCII" = [\\u0000-\\u007F].
* "ASCII" = [\\u0000-\\u007F],
* "Assigned" = [:^Cn:].
*
* @param value a value alias, either short or long. The name is matched
* loosely. See PropertyValueAliases.txt for names and a description of

View File

@ -265,7 +265,8 @@ uset_applyIntPropertyValue(USet* set,
* matched loosely and correspond to the following sets:
*
* "ANY" = [\\u0000-\\U0010FFFF],
* "ASCII" = [\\u0000-\\u007F].
* "ASCII" = [\\u0000-\\u007F],
* "Assigned" = [:^Cn:].
*
* @param propLength the length of the prop, or -1 if NULL
*

View File

@ -77,42 +77,12 @@ static const UChar HYPHEN_RIGHT_BRACE[] = {HYPHEN,SET_CLOSE,0}; /*-]*/
// Special property set IDs
static const char ANY[] = "ANY"; // [\u0000-\U0010FFFF]
static const char ASCII[] = "ASCII"; // [\u0000-\u007F]
static const char ASSIGNED[] = "Assigned"; // [:^Cn:]
// Unicode name property alias
#define NAME_PROP "na"
#define NAME_PROP_LENGTH 2
// TODO: Remove the following special-case code when
// these four C99-compatibility properties are implemented
// as enums/names.
U_CDECL_BEGIN
typedef UBool (U_CALLCONV *C99_Property_Function)(UChar32);
U_CDECL_END
static const struct C99_Map {
const char* name;
C99_Property_Function func;
UPropertySource src;
} C99_DISPATCH[] = {
// These three entries omitted; they clash with PropertyAliases
// names for Unicode properties, so UnicodeSet already maps them
// to those properties.
//{ "alpha", u_isalpha, UPROPS_SRC_PROPSVEC },
//{ "lower", u_islower, UPROPS_SRC_CASE },
//{ "upper", u_isupper, UPROPS_SRC_CASE },
// MUST be in SORTED order
{ "alnum", u_isalnum, UPROPS_SRC_CHAR },
{ "blank", u_isblank, UPROPS_SRC_PROPSVEC },
// new alias in Unicode 4.1 { "cntrl", u_iscntrl, UPROPS_SRC_CHAR },
// new alias in Unicode 4.1 { "digit", u_isdigit, UPROPS_SRC_CHAR },
{ "graph", u_isgraph, UPROPS_SRC_CHAR },
{ "print", u_isprint, UPROPS_SRC_CHAR },
// new alias in Unicode 4.1 { "punct", u_ispunct, UPROPS_SRC_CHAR },
// new alias in Unicode 4.1 { "space", u_isspace, UPROPS_SRC_CHAR },
{ "title", u_istitle, UPROPS_SRC_CHAR },
{ "xdigit", u_isxdigit, UPROPS_SRC_CHAR }
};
// TEMPORARY: Remove when deprecated category code constructor is removed.
static const UChar CATEGORY_NAMES[] = {
// Must be kept in sync with uchar.h/UCharCategory
@ -931,14 +901,6 @@ static UBool mungeCharName(char* dst, const char* src, int32_t dstCapacity) {
#define FAIL(ec) {ec=U_ILLEGAL_ARGUMENT_ERROR; return *this;}
// TODO: Remove the following special-case code when
// these four C99-compatibility properties are implemented
// as enums/names.
static UBool c99Filter(UChar32 ch, void* context) {
struct C99_Map* m = (struct C99_Map*) context;
return m->func(ch);
}
UnicodeSet&
UnicodeSet::applyIntPropertyValue(UProperty prop, int32_t value, UErrorCode& ec) {
if (U_FAILURE(ec)) return *this;
@ -974,7 +936,7 @@ UnicodeSet::applyPropertyAlias(const UnicodeString& prop,
UProperty p;
int32_t v;
UBool mustNotBeEmpty = FALSE;
UBool mustNotBeEmpty = FALSE, invert = FALSE;
if (value.length() > 0) {
p = u_getPropertyEnum(pname);
@ -1081,22 +1043,12 @@ UnicodeSet::applyPropertyAlias(const UnicodeString& prop,
} else if (0 == uprv_comparePropertyNames(ASCII, pname)) {
set(0, 0x7F);
return *this;
} else if (0 == uprv_comparePropertyNames(ASSIGNED, pname)) {
// [:Assigned:]=[:^Cn:]
p = UCHAR_GENERAL_CATEGORY_MASK;
v = U_GC_CN_MASK;
invert = TRUE;
} else {
// TODO: Remove the following special-case code when
// these four C99-compatibility properties are implemented
// as enums/names.
for (int32_t i=0; i<LENGTHOF(C99_DISPATCH); ++i) {
int32_t c = uprv_comparePropertyNames(pname, C99_DISPATCH[i].name);
if (c == 0) {
applyFilter(c99Filter, (void*) &C99_DISPATCH[i], C99_DISPATCH[i].src, ec);
return *this;
} else if (c < 0) {
// Further entries will not match; bail out
break;
}
}
FAIL(ec);
}
}
@ -1104,6 +1056,9 @@ UnicodeSet::applyPropertyAlias(const UnicodeString& prop,
}
applyIntPropertyValue(p, v, ec);
if(invert) {
complement();
}
if (U_SUCCESS(ec) && (mustNotBeEmpty && isEmpty())) {
// mustNotBeEmpty is set to true if an empty set indicates
@ -1342,6 +1297,10 @@ const UnicodeSet* UnicodeSet::getInclusions(int32_t src, UErrorCode &status) {
case UPROPS_SRC_PROPSVEC:
upropsvec_addPropertyStarts(&sa, &status);
break;
case UPROPS_SRC_CHAR_AND_PROPSVEC:
uchar_addPropertyStarts(&sa, &status);
upropsvec_addPropertyStarts(&sa, &status);
break;
case UPROPS_SRC_HST:
uhst_addPropertyStarts(&sa, &status);
break;

View File

@ -239,7 +239,12 @@ static const struct {
{ UPROPS_SRC_NORM, 0 }, /* UCHAR_NFKC_INERT */
{ UPROPS_SRC_NORM, 0 }, /* UCHAR_SEGMENT_STARTER */
{ 2, U_MASK(UPROPS_V2_PATTERN_SYNTAX) },
{ 2, U_MASK(UPROPS_V2_PATTERN_WHITE_SPACE) }
{ 2, U_MASK(UPROPS_V2_PATTERN_WHITE_SPACE) },
{ UPROPS_SRC_CHAR_AND_PROPSVEC, 0 }, /* UCHAR_POSIX_ALNUM */
{ UPROPS_SRC_CHAR, 0 }, /* UCHAR_POSIX_BLANK */
{ UPROPS_SRC_CHAR, 0 }, /* UCHAR_POSIX_GRAPH */
{ UPROPS_SRC_CHAR, 0 }, /* UCHAR_POSIX_PRINT */
{ UPROPS_SRC_CHAR, 0 } /* UCHAR_POSIX_XDIGIT */
};
U_CAPI UBool U_EXPORT2
@ -305,6 +310,26 @@ u_hasBinaryProperty(UChar32 c, UProperty which) {
default:
break;
}
} else if(column==UPROPS_SRC_CHAR) {
switch(which) {
case UCHAR_POSIX_BLANK:
return u_isblank(c);
case UCHAR_POSIX_GRAPH:
return u_isgraphPOSIX(c);
case UCHAR_POSIX_PRINT:
return u_isprintPOSIX(c);
case UCHAR_POSIX_XDIGIT:
return u_isxdigit(c);
default:
break;
}
} else if(column==UPROPS_SRC_CHAR_AND_PROPSVEC) {
switch(which) {
case UCHAR_POSIX_ALNUM:
return u_isalnumPOSIX(c);
default:
break;
}
}
}
}

View File

@ -224,6 +224,31 @@ uprv_getMaxValues(int32_t column);
U_CFUNC UHangulSyllableType
uchar_getHST(UChar32 c);
/**
* Checks if c is alphabetic, or a decimal digit; implements UCHAR_POSIX_ALNUM.
* @internal
*/
U_CFUNC UBool
u_isalnumPOSIX(UChar32 c);
/**
* Checks if c is in
* [^\p{space}\p{gc=Control}\p{gc=Surrogate}\p{gc=Unassigned}]
* with space=\p{Whitespace} and Control=Cc.
* Implements UCHAR_POSIX_GRAPH.
* @internal
*/
U_CFUNC UBool
u_isgraphPOSIX(UChar32 c);
/**
* Checks if c is in \p{graph}\p{blank} - \p{cntrl}.
* Implements UCHAR_POSIX_PRINT.
* @internal
*/
U_CFUNC UBool
u_isprintPOSIX(UChar32 c);
/** Turn a bit index into a bit flag. @internal */
#define FLAG(n) ((uint32_t)1<<(n))
@ -359,6 +384,8 @@ enum UPropertySource {
UPROPS_SRC_CASE,
/** From ubidi_props.c/ubidi.icu */
UPROPS_SRC_BIDI,
/** From uchar.c/uprops.icu main trie as well as properties vectors trie */
UPROPS_SRC_CHAR_AND_PROPSVEC,
/** One more than the highest UPropertySource (UPROPS_SRC_) constant. */
UPROPS_SRC_COUNT
};

View File

@ -889,6 +889,9 @@ void UnicodeSetTest::TestPropertySet() {
"\\u0F73\\u0F75\\u0F81",
"abcd\\u0300\\u0301\\u00c0\\u00c5",
"[:Assigned:]",
"A\\uE000\\uF8FF\\uFDC7\\U00010000\\U0010FFFD",
"\\u0888\\uFDD3\\uFFFE\\U00050005"
};
static const int32_t DATA_LEN = sizeof(DATA)/sizeof(DATA[0]);
@ -946,24 +949,20 @@ void UnicodeSetTest::TestPosixClasses() {
TEST_ASSERT_SUCCESS(status);
TEST_ASSERT(s1==s2);
}
UVersionInfo ICU_34 = {3, 4, 0, 0}; // Time Bomb for bug 4199
{
if (isICUVersionAtLeast(ICU_34)) { // Time Bomb Test
UErrorCode status = U_ZERO_ERROR;
UnicodeSet s1("[:alnum:]", status);
UnicodeSet s2("[\\p{Alphabetic}\\p{DecimalNumber}]", status);
TEST_ASSERT_SUCCESS(status);
TEST_ASSERT(s1==s2);
}
UErrorCode status = U_ZERO_ERROR;
UnicodeSet s1("[:alnum:]", status);
UnicodeSet s2("[\\p{Alphabetic}\\p{DecimalNumber}]", status);
TEST_ASSERT_SUCCESS(status);
TEST_ASSERT(s1==s2);
}
{
if (isICUVersionAtLeast(ICU_34)) { // Time Bomb Test
UErrorCode status = U_ZERO_ERROR;
UnicodeSet s1("[:space:]", status);
UnicodeSet s2("\\p{Whitespace}", status);
TEST_ASSERT_SUCCESS(status);
TEST_ASSERT(s1==s2);
} }
UErrorCode status = U_ZERO_ERROR;
UnicodeSet s1("[:space:]", status);
UnicodeSet s2("\\p{Whitespace}", status);
TEST_ASSERT_SUCCESS(status);
TEST_ASSERT(s1==s2);
}
{
UErrorCode status = U_ZERO_ERROR;
UnicodeSet s1("[:blank:]", status);
@ -974,39 +973,29 @@ void UnicodeSetTest::TestPosixClasses() {
TEST_ASSERT(s1==s2);
}
{
if (isICUVersionAtLeast(ICU_34)) { // Time Bomb Test
UErrorCode status = U_ZERO_ERROR;
UnicodeSet s1("[:cntrl:]", status);
TEST_ASSERT_SUCCESS(status);
UnicodeSet s2("\\p{Control}", status);
TEST_ASSERT_SUCCESS(status);
TEST_ASSERT(s1==s2);
}
UErrorCode status = U_ZERO_ERROR;
UnicodeSet s1("[:cntrl:]", status);
TEST_ASSERT_SUCCESS(status);
UnicodeSet s2("\\p{Control}", status);
TEST_ASSERT_SUCCESS(status);
TEST_ASSERT(s1==s2);
}
{
UErrorCode status = U_ZERO_ERROR;
UnicodeSet s1("[:graph:]", status);
TEST_ASSERT_SUCCESS(status);
UnicodeSet s2("[^\\p{Whitespace}\\p{Control}\\p{Format}"
"\\p{Surrogate}\\p{Unassigned}]", status);
UnicodeSet s2("[^\\p{Whitespace}\\p{Control}\\p{Surrogate}\\p{Unassigned}]", status);
TEST_ASSERT_SUCCESS(status);
TEST_ASSERT(s1==s2);
}
{
if (isICUVersionAtLeast(ICU_34)) { // Time Bomb Test
UErrorCode status = U_ZERO_ERROR;
UnicodeSet s1("[:print:]", status);
TEST_ASSERT_SUCCESS(status);
UnicodeSet s2(
"[[^\\p{Whitespace}\\p{Control}\\p{Format}\\p{Surrogate}\\p{Unassigned}]"
"[\\p{Whitespace}-[\\u000a\\u000B\\u000c\\u000d\\u0085\\p{LineSeparator}]]"
"-[\\p{Control}]]"
, status);
TEST_ASSERT_SUCCESS(status);
TEST_ASSERT(s1==s2);
}
UErrorCode status = U_ZERO_ERROR;
UnicodeSet s1("[:print:]", status);
TEST_ASSERT_SUCCESS(status);
UnicodeSet s2("[[:graph:][:blank:]-[\\p{Control}]]" ,status);
TEST_ASSERT_SUCCESS(status);
TEST_ASSERT(s1==s2);
}
}
/**
* Test cloning of UnicodeSet. For C++, we test the copy constructor.

View File

@ -1,5 +1,5 @@
######################################################################
# Copyright (c) 2003-2004, International Business Machines
# Copyright (c) 2003-2005, International Business Machines
# Corporation and others. All Rights Reserved.
######################################################################
# Author: Alan Liu
@ -42,3 +42,11 @@ nfcinert; NFC_Inert
nfkcinert; NFKC_Inert
segstart; Segment_Starter
# C/POSIX character classes that do not have Unicode property [value] aliases
# see uchar.h
n/a; alnum
n/a; blank
n/a; graph
n/a; print
n/a; xdigit

File diff suppressed because it is too large Load Diff

View File

@ -65,6 +65,9 @@ my $UNIDATA_DIR = "$ICU_DIR/source/data/unidata";
# Get the current year from the system
my $YEAR = 1900+@{[localtime]}[5]; # Get the current year
# Used to make "n/a" property aliases (Unicode or Synthetic) unique
my $propNA = 0;
#----------------------------------------------------------------------
# Top level property keys for binary, enumerated, string, and double props
my @TOP = qw( _bp _ep _sp _dp _mp );
@ -304,7 +307,7 @@ END
$i = $groupToInt{$groupString};
} else {
my @names = split(/\|/, $groupString);
die "Error: Wrong number of names in " . $groupString if (@names < 2);
die "Error: Wrong number of names in " . $groupString if (@names < 1);
$i = @nameGroups; # index of group we are making
$groupToInt{$groupString} = $i; # Cache for reuse
push @nameGroups, map { $stringToID{$_} } @names;
@ -589,7 +592,12 @@ sub merge_PropertyAliases {
die "Error: Property $long_name not found (or used more than once)";
}
my $value = $pa->{$long_name} . "|" . $long_name;
my $value;
if($pa->{$long_name} =~ m|^n/a\d*$|) {
$value = $long_name;
} else {
$value = $pa->{$long_name} . "|" . $long_name;
}
if (exists $additional_property_aliases{$long_name}) {
$value .= "|" . $additional_property_aliases{$long_name};
}
@ -689,8 +697,8 @@ sub merge_PropertyValueAliases {
my $l = $n;
my $r = $pva->{$n};
# convert |n/a\d+| to blank
$l = '' if ($l =~ m|^n/a\d+$|);
$r = '' if ($r =~ m|^n/a\d+$|);
$l = '' if ($l =~ m|^n/a\d*$|);
$r = '' if ($r =~ m|^n/a\d*$|);
$hh->{$enum} = "$l|$r";
# Don't delete the 'gc' properties because we need to share
@ -766,8 +774,6 @@ sub read_PropertyAliases {
my $in = new FileHandle($filename, 'r');
die "Error: Cannot open $filename" if (!defined $in);
my $sym = 0; # Used to make "n/a" strings unique
while (<$in>) {
# Read version (embedded in a comment)
@ -795,9 +801,12 @@ sub read_PropertyAliases {
}
# Make "n/a" strings unique
if ($short eq 'n/a') {
$short .= sprintf("%03d", $propNA++);
}
my $long = $fields[0];
if ($long eq 'n/a') {
$long .= sprintf("%03d", $sym++);
$long .= sprintf("%03d", $propNA++);
}
# Add long name->short name to the hash=pa hash table
@ -847,7 +856,7 @@ sub read_PropertyValueAliases {
my $in = new FileHandle($filename, 'r');
die "Error: Cannot open $filename" if (!defined $in);
my $sym = 0; # Used to make "n/a" strings unique
my $valueNA = 0; # Used to make "n/a" strings unique
while (<$in>) {
@ -868,7 +877,7 @@ sub read_PropertyValueAliases {
die "Error: Wrong number of fields in $filename"
if (@fields < 2 || @fields > 3);
# Make "n/a" strings unique
$fields[0] .= sprintf("%03d", $sym++) if ($fields[0] eq 'n/a');
$fields[0] .= sprintf("%03d", $valueNA++) if ($fields[0] eq 'n/a');
# Squash extra fields together
while (@fields > 2) {
my $f = pop @fields;