ICU-3170 update code for Unicode 4.0.1
X-SVN-Rev: 14844
This commit is contained in:
parent
72265d2950
commit
9b10636685
@ -1,6 +1,6 @@
|
||||
/*
|
||||
**********************************************************************
|
||||
* Copyright (C) 1997-2003, International Business Machines
|
||||
* Copyright (C) 1997-2004, International Business Machines
|
||||
* Corporation and others. All Rights Reserved.
|
||||
**********************************************************************
|
||||
*
|
||||
@ -286,6 +286,17 @@ typedef enum UProperty {
|
||||
mapping or _in_ the target of a case mapping. Not the same as
|
||||
the general category Cased_Letter. @draft ICU 2.6 */
|
||||
UCHAR_CASE_SENSITIVE,
|
||||
/** Binary property STerm (new in Unicode 4.0.1).
|
||||
Sentence Terminal. Used in UAX #29: Text Boundaries
|
||||
(http://www.unicode.org/reports/tr29/)
|
||||
@draft ICU 3.0 */
|
||||
UCHAR_S_TERM,
|
||||
/** Binary property Variation_Selector (new in Unicode 4.0.1).
|
||||
Indicates all those characters that qualify as Variation Selectors.
|
||||
For details on the behavior of these characters,
|
||||
see StandardizedVariants.html and 15.6 Variation Selectors.
|
||||
@draft ICU 3.0 */
|
||||
UCHAR_VARIATION_SELECTOR,
|
||||
/** One more than the last constant for binary Unicode properties. @stable ICU 2.1 */
|
||||
UCHAR_BINARY_LIMIT,
|
||||
|
||||
@ -956,6 +967,11 @@ enum UBlockCode {
|
||||
|
||||
/** @stable ICU 2.2 */
|
||||
UBLOCK_CYRILLIC_SUPPLEMENTARY = 97, /*[0500]*/
|
||||
/**
|
||||
* Unicode 4.0.1 renames the "Cyrillic Supplementary" block to "Cyrillic Supplement".
|
||||
* @draft ICU 3.0
|
||||
*/
|
||||
UBLOCK_CYRILLIC_SUPPLEMENT = 97, /*[0500]*/
|
||||
/** @stable ICU 2.2 */
|
||||
UBLOCK_TAGALOG = 98, /*[1700]*/
|
||||
/** @stable ICU 2.2 */
|
||||
@ -1215,6 +1231,8 @@ typedef enum ULineBreak {
|
||||
U_LB_HYPHEN, /*[HY]*/
|
||||
U_LB_IDEOGRAPHIC, /*[ID]*/
|
||||
U_LB_INSEPERABLE, /*[IN]*/
|
||||
/** Renamed from the misspelled "inseperable" in Unicode 4.0.1/ICU 3.0 @draft ICU 3.0 */
|
||||
U_LB_INSEPARABLE=U_LB_INSEPERABLE,/*[IN]*/
|
||||
U_LB_INFIX_NUMERIC, /*[IS]*/
|
||||
U_LB_LINE_FEED, /*[LF]*/
|
||||
U_LB_NONSTARTER, /*[NS]*/
|
||||
|
@ -1,6 +1,6 @@
|
||||
/*
|
||||
**********************************************************************
|
||||
* Copyright (C) 1997-2003, International Business Machines
|
||||
* Copyright (C) 1997-2004, International Business Machines
|
||||
* Corporation and others. All Rights Reserved.
|
||||
**********************************************************************
|
||||
*
|
||||
@ -83,6 +83,9 @@ typedef enum UScriptCode {
|
||||
USCRIPT_TAI_LE, /* Tale */
|
||||
USCRIPT_UGARITIC, /* Ugar */
|
||||
|
||||
/** New script code in Unicode 4.0.1 @draft ICU 3.0 */
|
||||
USCRIPT_KATAKANA_OR_HIRAGANA,/*Hrkt */
|
||||
|
||||
USCRIPT_CODE_LIMIT
|
||||
} UScriptCode;
|
||||
|
||||
|
@ -1,7 +1,7 @@
|
||||
/*
|
||||
*******************************************************************************
|
||||
*
|
||||
* Copyright (C) 2002-2003, International Business Machines
|
||||
* Copyright (C) 2002-2004, International Business Machines
|
||||
* Corporation and others. All Rights Reserved.
|
||||
*
|
||||
*******************************************************************************
|
||||
@ -214,7 +214,9 @@ static const struct {
|
||||
{ 1, U_MASK(UPROPS_WHITE_SPACE) },
|
||||
{ 1, U_MASK(UPROPS_XID_CONTINUE) },
|
||||
{ 1, U_MASK(UPROPS_XID_START) },
|
||||
{ -1, U_MASK(UPROPS_CASE_SENSITIVE_SHIFT) }
|
||||
{ -1, U_MASK(UPROPS_CASE_SENSITIVE_SHIFT) },
|
||||
{ 2, U_MASK(UPROPS_V2_S_TERM) },
|
||||
{ 2, U_MASK(UPROPS_V2_VARIATION_SELECTOR) }
|
||||
};
|
||||
|
||||
U_CAPI UBool U_EXPORT2
|
||||
|
@ -1,7 +1,7 @@
|
||||
/*
|
||||
*******************************************************************************
|
||||
*
|
||||
* Copyright (C) 2002-2003, International Business Machines
|
||||
* Copyright (C) 2002-2004, International Business Machines
|
||||
* Corporation and others. All Rights Reserved.
|
||||
*
|
||||
*******************************************************************************
|
||||
@ -164,6 +164,7 @@ enum {
|
||||
/*
|
||||
* Properties in vector word 2
|
||||
* Bits
|
||||
* 31..24 More binary properties
|
||||
* 13..11 Joining Type
|
||||
* 10.. 5 Joining Group
|
||||
* 4.. 0 Decomposition Type
|
||||
@ -176,6 +177,12 @@ enum {
|
||||
|
||||
#define UPROPS_DT_MASK 0x0000001f
|
||||
|
||||
enum {
|
||||
UPROPS_V2_S_TERM=24, /* new in ICU 3.0 and Unicode 4.0.1 */
|
||||
UPROPS_V2_VARIATION_SELECTOR,
|
||||
UPROPS_V2_TOP /* must be <=32 */
|
||||
};
|
||||
|
||||
/**
|
||||
* Get a properties vector word for a code point.
|
||||
* Implemented in uchar.c for uprops.c.
|
||||
|
@ -1,7 +1,7 @@
|
||||
/*
|
||||
*******************************************************************************
|
||||
*
|
||||
* Copyright (C) 2001-2003, International Business Machines
|
||||
* Copyright (C) 2001-2004, International Business Machines
|
||||
* Corporation and others. All Rights Reserved.
|
||||
*
|
||||
*******************************************************************************
|
||||
@ -246,8 +246,22 @@ derivedNormalizationPropertiesLineFn(void *context,
|
||||
qcFlags&=0xf;
|
||||
} else if(0==uprv_memcmp(s, "MAYBE", 5)) {
|
||||
qcFlags&=0x30;
|
||||
} else if(0==uprv_memcmp(s, "QC", 2) && *(s=(char *)u_skipWhitespace(s+2))==';') {
|
||||
/*
|
||||
* Unicode 4.0.1:
|
||||
* changes single field "NFD_NO" -> two fields "NFD_QC; N" etc.
|
||||
*/
|
||||
/* start of the field */
|
||||
s=(char *)u_skipWhitespace(s+1);
|
||||
if(*s=='N') {
|
||||
qcFlags&=0xf;
|
||||
} else if(*s=='M') {
|
||||
qcFlags&=0x30;
|
||||
} else {
|
||||
return; /* do nothing for "Yes" because it's the default value */
|
||||
}
|
||||
} else {
|
||||
return;
|
||||
return; /* do nothing for "Yes" because it's the default value */
|
||||
}
|
||||
|
||||
/* set this flag for all code points in this range */
|
||||
@ -259,7 +273,10 @@ derivedNormalizationPropertiesLineFn(void *context,
|
||||
while(start<=end) {
|
||||
setCompositionExclusion(start++);
|
||||
}
|
||||
} else if(0==uprv_memcmp(s, "FNC", 3) && *(s=(char *)u_skipWhitespace(s+3))==';') {
|
||||
} else if(
|
||||
(0==uprv_memcmp(s, "FNC", 3) || 0==uprv_memcmp(s, "FC_NFKC", 7))
|
||||
&& *(s=(char *)u_skipWhitespace(s+3))==';'
|
||||
) {
|
||||
/* FC_NFKC_Closure, parse field 2 to get the string */
|
||||
char *t;
|
||||
|
||||
|
@ -1,7 +1,7 @@
|
||||
/*
|
||||
*******************************************************************************
|
||||
*
|
||||
* Copyright (C) 2002-2003, International Business Machines
|
||||
* Copyright (C) 2002-2004, International Business Machines
|
||||
* Corporation and others. All Rights Reserved.
|
||||
*
|
||||
*******************************************************************************
|
||||
@ -265,7 +265,11 @@ propListNames[]={
|
||||
{ "Unified_Ideograph", 1, UPROPS_UNIFIED_IDEOGRAPH },
|
||||
{ "Deprecated", 1, UPROPS_DEPRECATED },
|
||||
{ "Soft_Dotted", 1, UPROPS_SOFT_DOTTED },
|
||||
{ "Logical_Order_Exception", 1, UPROPS_LOGICAL_ORDER_EXCEPTION }
|
||||
{ "Logical_Order_Exception", 1, UPROPS_LOGICAL_ORDER_EXCEPTION },
|
||||
|
||||
/* new properties in Unicode 4.0.1 */
|
||||
{ "STerm", 2, UPROPS_V2_S_TERM },
|
||||
{ "Variation_Selector", 2, UPROPS_V2_VARIATION_SELECTOR }
|
||||
};
|
||||
|
||||
static const Binaries
|
||||
@ -399,7 +403,7 @@ generateAdditionalProperties(char *filename, const char *suffix, UErrorCode *pEr
|
||||
/* process various UCD .txt files */
|
||||
|
||||
/* add Han numeric types & values */
|
||||
parseMultiFieldFile(filename, basename, "DerivedNumericValues", suffix, 3, numericLineFn, pErrorCode);
|
||||
parseMultiFieldFile(filename, basename, "DerivedNumericValues", suffix, 2, numericLineFn, pErrorCode);
|
||||
|
||||
/* set proper bidi class for unassigned code points (Cn) */
|
||||
parseTwoFieldFile(filename, basename, "DerivedBidiClass", suffix, bidiClassLineFn, pErrorCode);
|
||||
@ -537,7 +541,7 @@ numericLineFn(void *context,
|
||||
Props newProps;
|
||||
char *s, *end;
|
||||
uint32_t start, limit, value, oldProps32;
|
||||
int32_t type, oldType;
|
||||
int32_t oldType;
|
||||
char c;
|
||||
UBool isFraction;
|
||||
|
||||
@ -586,26 +590,22 @@ numericLineFn(void *context,
|
||||
}
|
||||
}
|
||||
|
||||
/* parse numeric type */
|
||||
s=trimTerminateField(fields[2][0], fields[2][1]);
|
||||
type=u_getPropertyValueEnum(UCHAR_NUMERIC_TYPE, s);
|
||||
if(type<=0) {
|
||||
fprintf(stderr, "genprops error: unknown numeric type in DerivedNumericValues.txt field 1 at %s\n", s);
|
||||
exit(U_PARSE_ERROR);
|
||||
}
|
||||
/*
|
||||
* Unicode 4.0.1 removes the third column that used to list the numeric type.
|
||||
* Assume that either the data is the same as in UnicodeData.txt,
|
||||
* or else that the numeric type is "numeric".
|
||||
* This should work because we only expect to add numeric values for
|
||||
* Han characters; for those, UnicodeData.txt lists only ranges without
|
||||
* specific properties for single characters.
|
||||
*/
|
||||
|
||||
for(; start<limit; ++start) {
|
||||
oldProps32=getProps(start);
|
||||
oldType=(int32_t)GET_NUMERIC_TYPE(oldProps32);
|
||||
if(oldType==type) {
|
||||
if(oldType!=0) {
|
||||
/* this code point was already listed with its numeric value in UnicodeData.txt */
|
||||
continue;
|
||||
}
|
||||
if(oldType!=0) {
|
||||
/* the numeric type differs from what we got from UnicodeData.txt */
|
||||
fprintf(stderr, "genprops error: new numeric value for an already numeric character in DerivedNumericValues.txt at %s\n", fields[0][0]);
|
||||
exit(U_PARSE_ERROR);
|
||||
}
|
||||
|
||||
/*
|
||||
* Do not set a numeric value for code points that have other
|
||||
@ -630,7 +630,7 @@ numericLineFn(void *context,
|
||||
}
|
||||
|
||||
if(beVerbose) {
|
||||
printf("adding U+%04x numeric type %d value %u\n", start, type, value);
|
||||
printf("adding U+%04x numeric type %d value %u\n", start, U_NT_NUMERIC, value);
|
||||
}
|
||||
|
||||
/* reconstruct the properties and set the new numeric type and value */
|
||||
@ -639,8 +639,8 @@ numericLineFn(void *context,
|
||||
newProps.generalCategory=(uint8_t)GET_CATEGORY(oldProps32);
|
||||
newProps.bidi=(uint8_t)GET_BIDI_CLASS(oldProps32);
|
||||
newProps.isMirrored=(uint8_t)(oldProps32&(1UL<<UPROPS_MIRROR_SHIFT) ? TRUE : FALSE);
|
||||
newProps.numericType=(uint8_t)type; /* newly parsed numeric type */
|
||||
newProps.numericValue=(int32_t)value; /* newly parsed numeric value */
|
||||
newProps.numericType=(uint8_t)U_NT_NUMERIC; /* assumed numeric type, see Unicode 4.0.1 comment */
|
||||
newProps.numericValue=(int32_t)value; /* newly parsed numeric value */
|
||||
addProps(start, makeProps(&newProps));
|
||||
}
|
||||
}
|
||||
|
Loading…
Reference in New Issue
Block a user