2004-05-08 08:48:01 +00:00
|
|
|
/*
|
|
|
|
*******************************************************************************
|
|
|
|
* Copyright (C) 2004, International Business Machines
|
|
|
|
* Corporation and others. All Rights Reserved.
|
|
|
|
*******************************************************************************
|
|
|
|
* file name: ucol_sit.cpp
|
|
|
|
* encoding: US-ASCII
|
|
|
|
* tab size: 8 (not used)
|
|
|
|
* indentation:4
|
|
|
|
*
|
|
|
|
* Modification history
|
|
|
|
* Date Name Comments
|
|
|
|
* 03/12/2004 weiv Creation
|
|
|
|
*/
|
|
|
|
|
|
|
|
#include "utracimp.h"
|
|
|
|
#include "ucol_imp.h"
|
|
|
|
#include "unormimp.h"
|
2004-05-13 09:02:33 +00:00
|
|
|
#include "cmemory.h"
|
|
|
|
#include "cstring.h"
|
2004-05-08 08:48:01 +00:00
|
|
|
|
|
|
|
|
|
|
|
/*
|
|
|
|
struct FromShortString {
|
|
|
|
UVersion UCAVersion;
|
|
|
|
char locale[256];
|
|
|
|
UChar variableTop[256];
|
|
|
|
|
|
|
|
};
|
|
|
|
*/
|
|
|
|
|
2004-05-13 09:02:33 +00:00
|
|
|
|
|
|
|
enum OptionsList {
|
|
|
|
UCOL_SIT_LANGUAGE = 0,
|
|
|
|
UCOL_SIT_SCRIPT,
|
|
|
|
UCOL_SIT_REGION,
|
|
|
|
UCOL_SIT_VARIANT,
|
|
|
|
UCOL_SIT_KEYWORD,
|
|
|
|
UCOL_SIT_RFC3166BIS,
|
|
|
|
UCOL_SIT_STRENGTH,
|
|
|
|
UCOL_SIT_CASE_LEVEL,
|
|
|
|
UCOL_SIT_CASE_FIRST,
|
|
|
|
UCOL_SIT_NUMERIC_COLLATION,
|
|
|
|
UCOL_SIT_ALTERNATE_HANDLING,
|
|
|
|
UCOL_SIT_NORMALIZATION_MODE,
|
|
|
|
UCOL_SIT_FRENCH_COLLATION,
|
|
|
|
UCOL_SIT_HIRAGANA_QUATERNARY,
|
|
|
|
UCOL_SIT_VARIABLE_TOP,
|
|
|
|
UCOL_SIT_ITEMS_COUNT
|
|
|
|
};
|
|
|
|
|
|
|
|
const int32_t locElementCount = 5;
|
|
|
|
const int32_t locElementLen = 32;
|
|
|
|
|
|
|
|
struct CollatorSpec {
|
|
|
|
char locElements[locElementCount][locElementLen];
|
|
|
|
char locale[512];
|
|
|
|
UColAttributeValue options[UCOL_ATTRIBUTE_COUNT];
|
2004-05-13 20:45:53 +00:00
|
|
|
uint32_t variableTopValue;
|
|
|
|
UChar variableTopString[locElementLen];
|
2004-05-13 09:02:33 +00:00
|
|
|
};
|
|
|
|
|
|
|
|
|
2004-05-13 20:50:40 +00:00
|
|
|
U_CDECL_BEGIN
|
2004-05-13 09:02:33 +00:00
|
|
|
typedef const char* U_CALLCONV
|
2004-05-17 06:33:22 +00:00
|
|
|
ActionFunction(CollatorSpec *spec, uint32_t value1, const char* string,
|
2004-05-13 09:02:33 +00:00
|
|
|
UErrorCode *status);
|
2004-05-13 20:50:40 +00:00
|
|
|
U_CDECL_END
|
2004-05-13 09:02:33 +00:00
|
|
|
|
|
|
|
struct AttributeConversion {
|
|
|
|
char letter;
|
|
|
|
UColAttributeValue value;
|
|
|
|
};
|
|
|
|
|
|
|
|
static AttributeConversion conversions[12] = {
|
|
|
|
{ '1', UCOL_PRIMARY },
|
|
|
|
{ '2', UCOL_SECONDARY },
|
|
|
|
{ '3', UCOL_TERTIARY },
|
|
|
|
{ '4', UCOL_QUATERNARY },
|
|
|
|
{ 'D', UCOL_DEFAULT },
|
|
|
|
{ 'I', UCOL_IDENTICAL },
|
|
|
|
{ 'L', UCOL_LOWER_FIRST },
|
|
|
|
{ 'N', UCOL_NON_IGNORABLE },
|
|
|
|
{ 'O', UCOL_ON },
|
|
|
|
{ 'S', UCOL_SHIFTED },
|
|
|
|
{ 'U', UCOL_UPPER_FIRST },
|
|
|
|
{ 'X', UCOL_OFF }
|
2004-05-08 08:48:01 +00:00
|
|
|
};
|
|
|
|
|
|
|
|
struct ShortStringOptions {
|
|
|
|
char optionStart;
|
2004-05-13 09:02:33 +00:00
|
|
|
ActionFunction *action;
|
|
|
|
uint32_t attr;
|
2004-05-08 08:48:01 +00:00
|
|
|
};
|
|
|
|
|
2004-05-13 09:02:33 +00:00
|
|
|
|
|
|
|
U_CDECL_BEGIN
|
|
|
|
static const char* U_CALLCONV
|
2004-05-17 06:33:22 +00:00
|
|
|
_processLocaleElement(CollatorSpec *spec, uint32_t value, const char* string,
|
2004-05-13 09:02:33 +00:00
|
|
|
UErrorCode *status) {
|
|
|
|
int32_t len = 0;
|
|
|
|
do {
|
|
|
|
spec->locElements[value][len++] = *string;
|
|
|
|
} while(*(++string) != '_' && *string && len < locElementLen);
|
|
|
|
if(len >= locElementLen) {
|
|
|
|
*status = U_BUFFER_OVERFLOW_ERROR;
|
|
|
|
return string;
|
|
|
|
}
|
|
|
|
// skip the underscore at the end
|
2004-05-17 06:33:22 +00:00
|
|
|
return ++string;
|
2004-05-13 09:02:33 +00:00
|
|
|
}
|
2004-05-13 20:45:53 +00:00
|
|
|
U_CDECL_END
|
2004-05-13 09:02:33 +00:00
|
|
|
|
2004-05-13 20:45:53 +00:00
|
|
|
U_CDECL_BEGIN
|
2004-05-13 09:02:33 +00:00
|
|
|
static const char* U_CALLCONV
|
2004-05-17 06:33:22 +00:00
|
|
|
_processRFC3166Locale(CollatorSpec *spec, uint32_t value1, const char* string,
|
2004-05-13 09:02:33 +00:00
|
|
|
UErrorCode *status) {
|
2004-05-17 06:33:22 +00:00
|
|
|
return string;
|
2004-05-13 09:02:33 +00:00
|
|
|
}
|
2004-05-13 20:45:53 +00:00
|
|
|
U_CDECL_END
|
2004-05-13 09:02:33 +00:00
|
|
|
|
2004-05-13 20:45:53 +00:00
|
|
|
U_CDECL_BEGIN
|
2004-05-13 09:02:33 +00:00
|
|
|
static const char* U_CALLCONV
|
2004-05-17 06:33:22 +00:00
|
|
|
_processCollatorOption(CollatorSpec *spec, uint32_t option, const char* string,
|
2004-05-13 09:02:33 +00:00
|
|
|
UErrorCode *status) {
|
|
|
|
int32_t i = 0;
|
2004-05-17 06:33:22 +00:00
|
|
|
for(i = 0; i < (int32_t)(sizeof(conversions)/sizeof(conversions[0])); i++) {
|
2004-05-13 09:02:33 +00:00
|
|
|
if(*string == conversions[i].letter) {
|
|
|
|
spec->options[option] = conversions[i].value;
|
|
|
|
if(*(++string) != '_' && *string) {
|
|
|
|
*status = U_ILLEGAL_ARGUMENT_ERROR;
|
|
|
|
return string;
|
|
|
|
}
|
|
|
|
return ++string;
|
|
|
|
}
|
|
|
|
}
|
|
|
|
*status = U_ILLEGAL_ARGUMENT_ERROR;
|
|
|
|
return string;
|
|
|
|
}
|
2004-05-13 20:45:53 +00:00
|
|
|
U_CDECL_END
|
|
|
|
|
2004-05-13 09:02:33 +00:00
|
|
|
|
2004-05-17 06:33:22 +00:00
|
|
|
static UChar
|
2004-05-13 20:45:53 +00:00
|
|
|
readHexCodeUnit(const char **string, UErrorCode *status) {
|
|
|
|
UChar result = 0;
|
|
|
|
int32_t value = 0;
|
|
|
|
char c;
|
|
|
|
int32_t noDigits = 0;
|
|
|
|
while((c = **string) != 0 && noDigits < 4) {
|
|
|
|
if( c >= '0' && c <= '9') {
|
|
|
|
value = c - '0';
|
|
|
|
} else if ( c >= 'a' && c <= 'f') {
|
|
|
|
value = c - 'a' + 10;
|
|
|
|
} else if ( c >= 'A' && c <= 'F') {
|
|
|
|
value = c - 'A' + 10;
|
|
|
|
} else {
|
|
|
|
*status = U_ILLEGAL_ARGUMENT_ERROR;
|
|
|
|
return 0;
|
|
|
|
}
|
|
|
|
result = (result << 4) | value;
|
|
|
|
(*string)++;
|
|
|
|
}
|
2004-05-17 06:33:22 +00:00
|
|
|
|
2004-05-13 20:45:53 +00:00
|
|
|
}
|
2004-05-13 09:02:33 +00:00
|
|
|
|
2004-05-13 20:45:53 +00:00
|
|
|
U_CDECL_BEGIN
|
2004-05-13 09:02:33 +00:00
|
|
|
static const char* U_CALLCONV
|
|
|
|
_processVariableTop(CollatorSpec *spec, uint32_t value1, const char* string, UErrorCode *status) {
|
2004-05-13 20:45:53 +00:00
|
|
|
// get four digits
|
|
|
|
return string;
|
2004-05-13 09:02:33 +00:00
|
|
|
}
|
|
|
|
U_CDECL_END
|
|
|
|
|
2004-05-17 06:33:22 +00:00
|
|
|
static ShortStringOptions options[UCOL_SIT_ITEMS_COUNT] =
|
2004-05-13 09:02:33 +00:00
|
|
|
{
|
|
|
|
/* 00 UCOL_SIT_LANGUAGE */ { 'L', _processLocaleElement, 0 }, // language
|
|
|
|
/* 01 UCOL_SIT_SCRIPT */ { 'Z', _processLocaleElement, 1 }, // script
|
|
|
|
/* 02 UCOL_SIT_REGION */ { 'R', _processLocaleElement, 2 }, // region
|
|
|
|
/* 03 UCOL_SIT_VARIANT */ { 'V', _processLocaleElement, 3 }, // variant
|
|
|
|
/* 04 UCOL_SIT_KEYWORD */ { 'K', _processLocaleElement, 4 }, // keyword
|
|
|
|
/* 05 UCOL_SIT_RFC3166BIS */ { 'X', _processRFC3166Locale, 0 }, // rfc3166bis locale name
|
|
|
|
/* 06 UCOL_SIT_STRENGTH */ { 'S', _processCollatorOption, UCOL_STRENGTH }, // strength 1, 2, 3, 4, I, D
|
|
|
|
/* 07 UCOL_SIT_CASE_LEVEL */ { 'E', _processCollatorOption, UCOL_CASE_LEVEL }, // case level O, X, D
|
|
|
|
/* 08 UCOL_SIT_CASE_FIRST */ { 'C', _processCollatorOption, UCOL_CASE_FIRST }, // case first L, U, X, D
|
|
|
|
/* 09 UCOL_SIT_NUMERIC_COLLATION */ { 'D', _processCollatorOption, UCOL_NUMERIC_COLLATION }, // codan O, X, D
|
2004-05-17 06:33:22 +00:00
|
|
|
/* 10 UCOL_SIT_ALTERNATE_HANDLING */ { 'A', _processCollatorOption, UCOL_ALTERNATE_HANDLING }, // alternate N, S, D
|
2004-05-13 09:02:33 +00:00
|
|
|
/* 11 UCOL_SIT_NORMALIZATION_MODE */ { 'N', _processCollatorOption, UCOL_NORMALIZATION_MODE }, // norm O, X, D
|
|
|
|
/* 12 UCOL_SIT_FRENCH_COLLATION */ { 'F', _processCollatorOption, UCOL_FRENCH_COLLATION }, // french O, X, D
|
|
|
|
/* 13 UCOL_SIT_HIRAGANA_QUATERNARY] */ { 'H', _processCollatorOption, UCOL_HIRAGANA_QUATERNARY_MODE }, // hiragana O, X, D
|
|
|
|
/* 14 UCOL_SIT_VARIABLE_TOP */ { 'T', _processCollatorOption, 0 }
|
2004-05-08 08:48:01 +00:00
|
|
|
};
|
|
|
|
|
|
|
|
|
2004-05-13 09:02:33 +00:00
|
|
|
static
|
2004-05-17 06:33:22 +00:00
|
|
|
const char* ucol_sit_readOption(const char *start, CollatorSpec *spec,
|
2004-05-13 09:02:33 +00:00
|
|
|
UErrorCode *status) {
|
2004-05-17 06:33:22 +00:00
|
|
|
int32_t i = 0;
|
2004-05-13 09:02:33 +00:00
|
|
|
|
|
|
|
for(i = 0; i < UCOL_SIT_ITEMS_COUNT; i++) {
|
|
|
|
if(*start == options[i].optionStart) {
|
|
|
|
return options[i].action(spec, options[i].attr, start+1, status);
|
|
|
|
}
|
|
|
|
}
|
|
|
|
*status = U_ILLEGAL_ARGUMENT_ERROR;
|
|
|
|
return start;
|
|
|
|
}
|
|
|
|
|
|
|
|
static
|
|
|
|
void ucol_sit_initCollatorSpecs(CollatorSpec *spec) {
|
|
|
|
// reset everything
|
|
|
|
uprv_memset(spec, 0, sizeof(CollatorSpec));
|
|
|
|
// set collation options to default
|
|
|
|
int32_t i = 0;
|
|
|
|
for(i = 0; i < UCOL_ATTRIBUTE_COUNT; i++) {
|
|
|
|
spec->options[i] = UCOL_DEFAULT;
|
|
|
|
}
|
|
|
|
}
|
|
|
|
|
2004-05-17 06:33:22 +00:00
|
|
|
/**
|
2004-05-08 08:48:01 +00:00
|
|
|
* Open a collator defined by a short form string.
|
|
|
|
* The structure and the syntax of the string is defined in the "Naming collators"
|
2004-05-17 06:33:22 +00:00
|
|
|
* section of the users guide:
|
2004-05-08 08:48:01 +00:00
|
|
|
* http://oss.software.ibm.com/icu/userguide/Collate_Concepts.html#Naming_Collators
|
2004-05-17 06:33:22 +00:00
|
|
|
* The call to this function is equivalent to a call to ucol_open, followed by a
|
2004-05-08 08:48:01 +00:00
|
|
|
* series of calls to ucol_setAttribute and ucol_setVariableTop.
|
2004-05-13 20:45:53 +00:00
|
|
|
* Attributes are overriden by the subsequent attributes. So, for "S2_S3", final
|
2004-05-17 06:33:22 +00:00
|
|
|
* strength will be 3. 3066bis locale overrides individual locale parts.
|
|
|
|
* @param definition A short string containing a locale and a set of attributes.
|
2004-05-08 08:48:01 +00:00
|
|
|
* Attributes not explicitly mentioned are left at the default
|
|
|
|
* state for a locale.
|
|
|
|
* @param parseError if not NULL, structure that will get filled with error's pre
|
|
|
|
* and post context in case of error.
|
2004-05-17 06:33:22 +00:00
|
|
|
* @param status Error code. Apart from regular error conditions connected to
|
2004-05-08 08:48:01 +00:00
|
|
|
* instantiating collators (like out of memory or similar), this
|
|
|
|
* API will return an error if an invalid attribute or attribute/value
|
|
|
|
* combination is specified.
|
2004-05-17 06:33:22 +00:00
|
|
|
* @return A pointer to a UCollator or 0 if an error occured (including an
|
2004-05-08 08:48:01 +00:00
|
|
|
* invalid attribute).
|
|
|
|
* @see ucol_open
|
|
|
|
* @see ucol_setAttribute
|
|
|
|
* @see ucol_setVariableTop
|
|
|
|
* @draft ICU 3.0
|
|
|
|
*
|
|
|
|
*/
|
|
|
|
U_CAPI UCollator* U_EXPORT2
|
|
|
|
ucol_openFromShortString( const char *definition,
|
|
|
|
UParseError *parseError,
|
|
|
|
UErrorCode *status)
|
|
|
|
{
|
|
|
|
UTRACE_ENTRY_OC(UTRACE_UCOL_OPEN_FROM_SHORT_STRING);
|
|
|
|
UTRACE_DATA1(UTRACE_INFO, "short string = \"%s\"", definition);
|
|
|
|
|
|
|
|
if(U_FAILURE(*status)) return 0;
|
|
|
|
|
|
|
|
// first we want to pick stuff out of short string.
|
|
|
|
// we'll end up with an UCA version, locale and a bunch of
|
|
|
|
// settings
|
|
|
|
|
|
|
|
// analyse the string in order to get everything we need.
|
2004-05-13 09:02:33 +00:00
|
|
|
int32_t definitionLen = uprv_strlen(definition);
|
|
|
|
const char *definitionEnd = definition+definitionLen;
|
|
|
|
const char *string = definition;
|
|
|
|
CollatorSpec s;
|
|
|
|
ucol_sit_initCollatorSpecs(&s);
|
|
|
|
while(U_SUCCESS(*status) && string < definitionEnd) {
|
|
|
|
string = ucol_sit_readOption(string, &s, status);
|
|
|
|
}
|
2004-05-17 06:33:22 +00:00
|
|
|
|
2004-05-13 09:02:33 +00:00
|
|
|
if(U_FAILURE(*status)) {
|
|
|
|
parseError->line = 0;
|
|
|
|
parseError->offset = string - definition;
|
|
|
|
// perhaps just stuff chars in UChar[]?
|
|
|
|
parseError->preContext[0] = 0;
|
|
|
|
parseError->postContext[0] = 0;
|
|
|
|
}
|
2004-05-17 06:33:22 +00:00
|
|
|
|
2004-05-13 09:02:33 +00:00
|
|
|
// put the locale together, unless we have a done
|
|
|
|
// locale
|
|
|
|
int32_t i = 0;
|
|
|
|
if(s.locale[0] == 0) {
|
|
|
|
// first the language
|
|
|
|
uprv_strcat(s.locale, s.locElements[0]);
|
|
|
|
// then the script, if present
|
|
|
|
if(*(s.locElements[1])) {
|
|
|
|
uprv_strcat(s.locale, "_");
|
|
|
|
uprv_strcat(s.locale, s.locElements[1]);
|
|
|
|
}
|
|
|
|
// then the region, if present
|
|
|
|
if(*(s.locElements[2])) {
|
|
|
|
uprv_strcat(s.locale, "_");
|
|
|
|
uprv_strcat(s.locale, s.locElements[2]);
|
|
|
|
} else if(*(s.locElements[3])) { // if there is a variant, we need an underscore
|
|
|
|
uprv_strcat(s.locale, "_");
|
2004-05-17 06:33:22 +00:00
|
|
|
}
|
2004-05-13 09:02:33 +00:00
|
|
|
// add variant, if there
|
2004-05-17 06:33:22 +00:00
|
|
|
if(*(s.locElements[3])) {
|
2004-05-13 09:02:33 +00:00
|
|
|
uprv_strcat(s.locale, "_");
|
|
|
|
uprv_strcat(s.locale, s.locElements[3]);
|
|
|
|
}
|
|
|
|
|
|
|
|
// if there is a collation keyword, add that too
|
|
|
|
if(*(s.locElements[4])) {
|
|
|
|
uprv_strcat(s.locale, "@collation=");
|
|
|
|
uprv_strcat(s.locale, s.locElements[4]);
|
|
|
|
}
|
|
|
|
}
|
2004-05-08 08:48:01 +00:00
|
|
|
|
|
|
|
const UCollator* UCA = ucol_initUCA(status);
|
|
|
|
|
2004-05-13 09:02:33 +00:00
|
|
|
UCollator *result = ucol_open(s.locale, status);
|
|
|
|
|
|
|
|
for(i = 0; i < UCOL_ATTRIBUTE_COUNT; i++) {
|
|
|
|
if(s.options[i] != UCOL_DEFAULT) {
|
|
|
|
ucol_setAttribute(result, (UColAttribute)i, s.options[i], status);
|
|
|
|
}
|
|
|
|
}
|
|
|
|
if(U_FAILURE(*status)) { // here it can only be a bogus value
|
|
|
|
ucol_close(result);
|
|
|
|
result = NULL;
|
|
|
|
}
|
2004-05-17 06:33:22 +00:00
|
|
|
|
2004-05-08 08:48:01 +00:00
|
|
|
UTRACE_EXIT_PTR_STATUS(result, *status);
|
|
|
|
return result;
|
|
|
|
}
|
|
|
|
|
2004-05-13 20:45:53 +00:00
|
|
|
U_CAPI int32_t U_EXPORT2
|
|
|
|
ucol_getShortDefinitionString(const UCollator *coll,
|
|
|
|
const char *locale,
|
|
|
|
char *buffer,
|
|
|
|
int32_t capacity,
|
|
|
|
UErrorCode *status)
|
|
|
|
{
|
|
|
|
if(U_FAILURE(*status)) return 0;
|
|
|
|
CollatorSpec s;
|
|
|
|
ucol_sit_initCollatorSpecs(&s);
|
|
|
|
|
|
|
|
if(locale) {
|
|
|
|
uprv_strcpy(s.locale, locale);
|
|
|
|
uloc_getCountry(locale, s.locElements[0], locElementLen, status);
|
|
|
|
uloc_getScript(locale, s.locElements[1], locElementLen, status);
|
|
|
|
uloc_getVariant(locale, s.locElements[2], locElementLen, status);
|
|
|
|
uloc_getKeywordValue(locale, "collation", s.locElements[3], locElementLen, status);
|
|
|
|
}
|
|
|
|
|
|
|
|
int32_t i = 0;
|
|
|
|
for(i = 0; i < UCOL_ATTRIBUTE_COUNT; i++) {
|
|
|
|
s.options[i] = ucol_getAttribute(coll, (UColAttribute)i, status);
|
|
|
|
}
|
|
|
|
s.variableTopValue = ucol_getVariableTop(coll, status);
|
|
|
|
|
|
|
|
return 0;
|
|
|
|
}
|
|
|
|
|
|
|
|
U_CAPI int32_t U_EXPORT2
|
|
|
|
ucol_normalizeShortDefinitionString(const char *source,
|
|
|
|
char *destination,
|
|
|
|
int32_t capacity,
|
|
|
|
UParseError *parseError,
|
|
|
|
UErrorCode *status)
|
|
|
|
{
|
|
|
|
return 0;
|
|
|
|
}
|
|
|
|
|
|
|
|
|
|
|
|
|
2004-05-08 08:48:01 +00:00
|
|
|
U_CDECL_BEGIN
|
|
|
|
static UBool U_CALLCONV
|
|
|
|
_processContractions(const void *context, UChar32 start, UChar32 limit, uint32_t value) {
|
|
|
|
UErrorCode status = U_ZERO_ERROR;
|
|
|
|
USet *unsafe = (USet *)context;
|
|
|
|
UChar contraction[256];
|
2004-05-17 06:33:22 +00:00
|
|
|
if(value > UCOL_NOT_FOUND && getCETag(value) == CONTRACTION_TAG) {
|
2004-05-08 08:48:01 +00:00
|
|
|
// this is a contraction
|
|
|
|
// we want to add the code point for sure
|
|
|
|
while(start < limit) {
|
|
|
|
//uset_add(unsafe, start);
|
|
|
|
contraction[0] = (UChar)start;
|
|
|
|
// get the rest of the contraction string from the data structure
|
|
|
|
start++;
|
|
|
|
}
|
2004-05-17 06:33:22 +00:00
|
|
|
// check if there is anything else to add - if these lead
|
2004-05-08 08:48:01 +00:00
|
|
|
// to a longer contraction
|
|
|
|
}
|
|
|
|
if(U_FAILURE(status)) {
|
2004-05-17 06:33:22 +00:00
|
|
|
return FALSE;
|
2004-05-08 08:48:01 +00:00
|
|
|
} else {
|
2004-05-17 06:33:22 +00:00
|
|
|
return TRUE;
|
2004-05-08 08:48:01 +00:00
|
|
|
}
|
|
|
|
}
|
|
|
|
U_CDECL_END
|
|
|
|
|
|
|
|
static int32_t U_CALLCONV
|
|
|
|
_getTrieFoldingOffset(uint32_t data) {
|
|
|
|
return (int32_t)(data&0xFFFFFF);
|
|
|
|
}
|
|
|
|
|
|
|
|
U_CAPI int32_t U_EXPORT2
|
|
|
|
ucol_getUnsafeSet( const UCollator *coll,
|
|
|
|
USet *unsafe,
|
|
|
|
UErrorCode *status)
|
|
|
|
{
|
|
|
|
uset_clear(unsafe);
|
|
|
|
// add Thai/Lao prevowels
|
|
|
|
uset_addRange(unsafe, 0xe40, 0xe44);
|
|
|
|
uset_addRange(unsafe, 0xec0, 0xec4);
|
|
|
|
// add lead/trail surrogates
|
|
|
|
uset_addRange(unsafe, 0xd800, 0xdfff);
|
|
|
|
|
|
|
|
|
|
|
|
// add FCD things
|
|
|
|
const uint16_t *fcdTrieIndex=unorm_getFCDTrie(status);
|
|
|
|
int32_t i = 0;
|
|
|
|
|
|
|
|
// add unsafe BMPs
|
|
|
|
uint16_t fcd, leadFCD;
|
|
|
|
UChar32 c;
|
|
|
|
for(c = 0; c < 0xffff; c++) {
|
|
|
|
if(c==0xd800) {
|
|
|
|
c=0xe000;
|
|
|
|
}
|
|
|
|
fcd = unorm_getFCD16(fcdTrieIndex, (UChar)c);
|
|
|
|
if (fcd != 0) {
|
|
|
|
uset_add(unsafe, c);
|
|
|
|
}
|
|
|
|
}
|
|
|
|
|
|
|
|
// add unsafe supplementaries
|
|
|
|
for(c = 0x10000; c < 0x110000; ) {
|
|
|
|
leadFCD=unorm_getFCD16(fcdTrieIndex, U16_LEAD(c));
|
|
|
|
if(leadFCD==0) {
|
|
|
|
c+=0x400;
|
|
|
|
} else {
|
|
|
|
for(i=0; i<0x400; ++c, ++i) {
|
|
|
|
// either i or U16_TRAIL(c) can be used because only the lower 10 bits are relevant
|
|
|
|
fcd = unorm_getFCD16FromSurrogatePair(fcdTrieIndex, U16_LEAD(c), U16_TRAIL(c));
|
|
|
|
if (fcd != 0) {
|
|
|
|
uset_add(unsafe, c);
|
2004-05-17 06:33:22 +00:00
|
|
|
}
|
2004-05-08 08:48:01 +00:00
|
|
|
}
|
|
|
|
}
|
|
|
|
}
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
return uset_size(unsafe);
|
|
|
|
}
|
|
|
|
|
|
|
|
|
|
|
|
/**
|
|
|
|
* Get a set containing the contractions defined by the collator. The set includes
|
|
|
|
* both the UCA contractions and the contractions defined by the collator
|
2004-05-17 06:33:22 +00:00
|
|
|
* @param coll collator
|
2004-05-08 08:48:01 +00:00
|
|
|
* @param conts the set to hold the result
|
|
|
|
* @param status to hold the error code
|
|
|
|
* @return the size of the contraction set
|
|
|
|
*
|
|
|
|
* @draft ICU 3.0
|
|
|
|
*/
|
|
|
|
U_CAPI int32_t U_EXPORT2
|
|
|
|
ucol_getContractions( const UCollator *coll,
|
|
|
|
USet *contractions,
|
2004-05-17 06:33:22 +00:00
|
|
|
UErrorCode *status)
|
2004-05-08 08:48:01 +00:00
|
|
|
{
|
|
|
|
// add contractions from the UCA
|
|
|
|
int32_t width = coll->UCA->image->contractionUCACombosWidth;
|
|
|
|
int32_t size = coll->UCA->image->contractionUCACombosSize;
|
|
|
|
UChar *conts = (UChar *)((uint8_t *)coll->UCA->image + coll->UCA->image->contractionUCACombos);
|
|
|
|
int32_t i = 0;
|
|
|
|
while(i < size * width) {
|
|
|
|
if(*(conts + i + 2)) {
|
|
|
|
uset_addString(contractions, conts+i, 3);
|
|
|
|
} else {
|
|
|
|
uset_addString(contractions, conts+i, 2);
|
|
|
|
}
|
|
|
|
|
|
|
|
i += 3;
|
|
|
|
}
|
|
|
|
// This is collator specific. Add contractions from a collator
|
|
|
|
coll->mapping->getFoldingOffset = _getTrieFoldingOffset;
|
|
|
|
utrie_enum(coll->mapping, NULL, _processContractions, contractions);
|
|
|
|
|
|
|
|
return uset_size(contractions);
|
|
|
|
|
|
|
|
}
|
|
|
|
|
|
|
|
U_CAPI uint32_t U_EXPORT2
|
|
|
|
ucol_collatorToIdentifier(const UCollator *coll,
|
|
|
|
UErrorCode *status) {
|
|
|
|
return 0;
|
|
|
|
}
|
|
|
|
|
|
|
|
U_CAPI UCollator* U_EXPORT2
|
|
|
|
ucol_openFromIdentifier(uint32_t identifier,
|
|
|
|
UErrorCode *status) {
|
|
|
|
return NULL;
|
|
|
|
}
|
|
|
|
|
|
|
|
U_CAPI int32_t U_EXPORT2
|
|
|
|
ucol_identifierToShortString(uint32_t identifier,
|
|
|
|
char *buffer,
|
|
|
|
int32_t capacity,
|
|
|
|
UErrorCode *status) {
|
|
|
|
return 0;
|
|
|
|
}
|