ICU-6118 Remove the @internal functions ucol_collatorToIdentifier, ucol_openFromIdentifier, ucol_identifierToShortString and ucol_shortStringToIdentifier (not used by original customer and their data is out of date).

Remove some references to unused internal header references.
Make some functions library private.

X-SVN-Rev: 23177
This commit is contained in:
George Rhoten 2008-01-05 01:27:56 +00:00
parent 36eaa60b6c
commit 82b85fb45c
17 changed files with 208 additions and 608 deletions

View File

@ -1,6 +1,6 @@
/*
******************************************************************************
* Copyright (C) 1996-2007, International Business Machines Corporation and *
* Copyright (C) 1996-2008, International Business Machines Corporation and *
* others. All Rights Reserved. *
******************************************************************************
*/
@ -49,6 +49,7 @@
#include "umutex.h"
#include "servloc.h"
#include "ustrenum.h"
#include "uresimp.h"
#include "ucln_in.h"
static U_NAMESPACE_QUALIFIER Locale* availableLocaleList = NULL;

View File

@ -17,7 +17,6 @@
*/
#include "unicode/utypes.h"
#include "uassert.h"
#if !UCONFIG_NO_COLLATION
@ -27,7 +26,6 @@
#include "unicode/ustring.h"
#include "ucol_imp.h"
#include "ucol_elm.h"
#include "bocsu.h"
#include "unormimp.h"
@ -38,6 +36,7 @@
#include "cstring.h"
#include "utracimp.h"
#include "putilimp.h"
#include "uassert.h"
#ifdef UCOL_DEBUG
#include <stdio.h>
@ -357,7 +356,7 @@ ucol_initFromBinary(const uint8_t *bin, int32_t length,
}
*/
// We need these and we could be running without UCA
uprv_uca_initImplicitConstants(0, 0, status);
uprv_uca_initImplicitConstants(status);
UCATableHeader *colData = (UCATableHeader *)bin;
// do we want version check here? We're trying to figure out whether collators are compatible
if((base && (uprv_memcmp(colData->UCAVersion, base->image->UCAVersion, sizeof(UVersionInfo)) != 0 ||
@ -1182,7 +1181,7 @@ static void initImplicitConstants(int minPrimary, int maxPrimary,
* Supply parameters for generating implicit CEs
*/
U_CAPI void U_EXPORT2
uprv_uca_initImplicitConstants(int32_t, int32_t, UErrorCode *status) {
uprv_uca_initImplicitConstants(UErrorCode *status) {
// 13 is the largest 4-byte gap we can use without getting 2 four-byte forms.
//initImplicitConstants(minPrimary, maxPrimary, 0x04, 0xFE, 1, 1, status);
initImplicitConstants(minImplicitPrimary, maxImplicitPrimary, 0x04, 0xFE, 1, 1, status);
@ -1251,8 +1250,8 @@ ucol_initUCA(UErrorCode *status) {
ucln_i18n_registerCleanup(UCLN_I18N_UCOL, ucol_cleanup);
}
// Initalize variables for implicit generation
const UCAConstants *UCAconsts = (UCAConstants *)((uint8_t *)_staticUCA->image + _staticUCA->image->UCAConsts);
uprv_uca_initImplicitConstants(UCAconsts->UCA_PRIMARY_IMPLICIT_MIN, UCAconsts->UCA_PRIMARY_IMPLICIT_MAX, status);
//const UCAConstants *UCAconsts = (UCAConstants *)((uint8_t *)_staticUCA->image + _staticUCA->image->UCAConsts);
uprv_uca_initImplicitConstants(status);
//_staticUCA->mapping.getFoldingOffset = _getFoldingOffset;
}else{
udata_close(result);

View File

@ -1,7 +1,7 @@
/*
*******************************************************************************
*
* Copyright (C) 2001-2007, International Business Machines
* Copyright (C) 2001-2008, International Business Machines
* Corporation and others. All Rights Reserved.
*
*******************************************************************************
@ -14,7 +14,7 @@
* created by: Vladimir Weinstein
*
* This module builds a collator based on the rule set.
*
*
*/
#include "unicode/utypes.h"
@ -22,19 +22,23 @@
#if !UCONFIG_NO_COLLATION
#include "unicode/ucoleitr.h"
#include "unicode/udata.h"
#include "unicode/uchar.h"
#include "ucol_bld.h"
#include "ucln_in.h"
#include "umutex.h"
#include "unicode/uniset.h"
#include "ucol_bld.h"
#include "ucol_elm.h"
#include "ucol_cnt.h"
#include "ucln_in.h"
#include "umutex.h"
#include "unormimp.h"
#include "cmemory.h"
static const InverseUCATableHeader* _staticInvUCA = NULL;
static UDataMemory* invUCA_DATA_MEM = NULL;
U_CDECL_BEGIN
static UBool U_CALLCONV
isAcceptableInvUCA(void * /*context*/,
isAcceptableInvUCA(void * /*context*/,
const char * /*type*/, const char * /*name*/,
const UDataInfo *pInfo)
{
@ -51,26 +55,23 @@ isAcceptableInvUCA(void * /*context*/,
//pInfo->formatVersion[1]==INVUCA_FORMAT_VERSION_1 &&
//pInfo->formatVersion[2]==INVUCA_FORMAT_VERSION_2 &&
//pInfo->formatVersion[3]==INVUCA_FORMAT_VERSION_3 &&
) {
UVersionInfo UCDVersion;
u_getUnicodeVersion(UCDVersion);
if(pInfo->dataVersion[0]==UCDVersion[0] &&
pInfo->dataVersion[1]==UCDVersion[1]) {
//pInfo->dataVersion[1]==invUcaDataInfo.dataVersion[1] &&
//pInfo->dataVersion[2]==invUcaDataInfo.dataVersion[2] &&
//pInfo->dataVersion[3]==invUcaDataInfo.dataVersion[3]) {
return TRUE;
} else {
return FALSE;
}
} else {
return FALSE;
}
)
{
UVersionInfo UCDVersion;
u_getUnicodeVersion(UCDVersion);
return (pInfo->dataVersion[0]==UCDVersion[0] &&
pInfo->dataVersion[1]==UCDVersion[1]);
//pInfo->dataVersion[1]==invUcaDataInfo.dataVersion[1] &&
//pInfo->dataVersion[2]==invUcaDataInfo.dataVersion[2] &&
//pInfo->dataVersion[3]==invUcaDataInfo.dataVersion[3]) {
} else {
return FALSE;
}
}
U_CDECL_END
/*
* Takes two CEs (lead and continuation) and
/*
* Takes two CEs (lead and continuation) and
* compares them as CEs should be compared:
* primary vs. primary, secondary vs. secondary
* tertiary vs. tertiary
@ -92,8 +93,8 @@ static int32_t compareCEs(uint32_t source0, uint32_t source1, uint32_t target0,
if(s1 == t1 && s2 == t2) {
return 0;
}
s = (s1 & 0xFFFF0000)|((s2 & 0xFFFF0000)>>16);
t = (t1 & 0xFFFF0000)|((t2 & 0xFFFF0000)>>16);
s = (s1 & 0xFFFF0000)|((s2 & 0xFFFF0000)>>16);
t = (t1 & 0xFFFF0000)|((t2 & 0xFFFF0000)>>16);
if(s < t) {
return -1;
} else if(s > t) {
@ -158,13 +159,13 @@ int32_t ucol_inv_findCE(const UColTokenParser *src, uint32_t CE, uint32_t Second
static const uint32_t strengthMask[UCOL_CE_STRENGTH_LIMIT] = {
0xFFFF0000,
0xFFFFFF00,
0xFFFFFFFF
0xFFFFFF00,
0xFFFFFFFF
};
U_CAPI int32_t U_EXPORT2 ucol_inv_getNextCE(const UColTokenParser *src,
uint32_t CE, uint32_t contCE,
uint32_t *nextCE, uint32_t *nextContCE,
uint32_t CE, uint32_t contCE,
uint32_t *nextCE, uint32_t *nextContCE,
uint32_t strength)
{
uint32_t *CETable = (uint32_t *)((uint8_t *)src->invUCA+src->invUCA->table);
@ -183,18 +184,19 @@ U_CAPI int32_t U_EXPORT2 ucol_inv_getNextCE(const UColTokenParser *src,
*nextCE = CE;
*nextContCE = contCE;
while((*nextCE & strengthMask[strength]) == CE
&& (*nextContCE & strengthMask[strength]) == contCE) {
*nextCE = (*(CETable+3*(++iCE)));
*nextContCE = (*(CETable+3*(iCE)+1));
}
while((*nextCE & strengthMask[strength]) == CE
&& (*nextContCE & strengthMask[strength]) == contCE)
{
*nextCE = (*(CETable+3*(++iCE)));
*nextContCE = (*(CETable+3*(iCE)+1));
}
return iCE;
return iCE;
}
U_CFUNC int32_t U_EXPORT2 ucol_inv_getPrevCE(const UColTokenParser *src,
uint32_t CE, uint32_t contCE,
uint32_t *prevCE, uint32_t *prevContCE,
U_CFUNC int32_t U_EXPORT2 ucol_inv_getPrevCE(const UColTokenParser *src,
uint32_t CE, uint32_t contCE,
uint32_t *prevCE, uint32_t *prevContCE,
uint32_t strength)
{
uint32_t *CETable = (uint32_t *)((uint8_t *)src->invUCA+src->invUCA->table);
@ -213,32 +215,35 @@ U_CFUNC int32_t U_EXPORT2 ucol_inv_getPrevCE(const UColTokenParser *src,
*prevCE = CE;
*prevContCE = contCE;
while((*prevCE & strengthMask[strength]) == CE
while((*prevCE & strengthMask[strength]) == CE
&& (*prevContCE & strengthMask[strength])== contCE
&& iCE > 0) { /* this condition should prevent falling off the edge of the world */
/* here, we end up in a singularity - zero */
*prevCE = (*(CETable+3*(--iCE)));
*prevContCE = (*(CETable+3*(iCE)+1));
}
&& iCE > 0) /* this condition should prevent falling off the edge of the world */
{
/* here, we end up in a singularity - zero */
*prevCE = (*(CETable+3*(--iCE)));
*prevContCE = (*(CETable+3*(iCE)+1));
}
return iCE;
return iCE;
}
U_CFUNC uint32_t U_EXPORT2 ucol_getCEStrengthDifference(uint32_t CE, uint32_t contCE,
uint32_t prevCE, uint32_t prevContCE)
U_CFUNC uint32_t U_EXPORT2 ucol_getCEStrengthDifference(uint32_t CE, uint32_t contCE,
uint32_t prevCE, uint32_t prevContCE)
{
if(prevCE == CE && prevContCE == contCE) {
return UCOL_IDENTICAL;
}
if((prevCE & strengthMask[UCOL_PRIMARY]) != (CE & strengthMask[UCOL_PRIMARY])
|| (prevContCE & strengthMask[UCOL_PRIMARY]) != (contCE & strengthMask[UCOL_PRIMARY])) {
return UCOL_PRIMARY;
}
if((prevCE & strengthMask[UCOL_SECONDARY]) != (CE & strengthMask[UCOL_SECONDARY])
|| (prevContCE & strengthMask[UCOL_SECONDARY]) != (contCE & strengthMask[UCOL_SECONDARY])) {
return UCOL_SECONDARY;
}
return UCOL_TERTIARY;
|| (prevContCE & strengthMask[UCOL_PRIMARY]) != (contCE & strengthMask[UCOL_PRIMARY]))
{
return UCOL_PRIMARY;
}
if((prevCE & strengthMask[UCOL_SECONDARY]) != (CE & strengthMask[UCOL_SECONDARY])
|| (prevContCE & strengthMask[UCOL_SECONDARY]) != (contCE & strengthMask[UCOL_SECONDARY]))
{
return UCOL_SECONDARY;
}
return UCOL_TERTIARY;
}
@ -246,7 +251,7 @@ U_CFUNC uint32_t U_EXPORT2 ucol_getCEStrengthDifference(uint32_t CE, uint32_t co
inline int32_t ucol_inv_getPrevious(UColTokenParser *src, UColTokListHeader *lh, uint32_t strength) {
uint32_t CE = lh->baseCE;
uint32_t SecondCE = lh->baseContCE;
uint32_t SecondCE = lh->baseContCE;
uint32_t *CETable = (uint32_t *)((uint8_t *)src->invUCA+src->invUCA->table);
uint32_t previousCE, previousContCE;
@ -277,7 +282,7 @@ inline int32_t ucol_inv_getPrevious(UColTokenParser *src, UColTokListHeader *lh,
static
inline int32_t ucol_inv_getNext(UColTokenParser *src, UColTokListHeader *lh, uint32_t strength) {
uint32_t CE = lh->baseCE;
uint32_t SecondCE = lh->baseContCE;
uint32_t SecondCE = lh->baseContCE;
uint32_t *CETable = (uint32_t *)((uint8_t *)src->invUCA+src->invUCA->table);
uint32_t nextCE, nextContCE;
@ -295,19 +300,20 @@ inline int32_t ucol_inv_getNext(UColTokenParser *src, UColTokListHeader *lh, uin
nextCE = CE;
nextContCE = SecondCE;
while((nextCE & strengthMask[strength]) == CE
&& (nextContCE & strengthMask[strength]) == SecondCE) {
nextCE = (*(CETable+3*(++iCE)));
nextContCE = (*(CETable+3*(iCE)+1));
}
while((nextCE & strengthMask[strength]) == CE
&& (nextContCE & strengthMask[strength]) == SecondCE)
{
nextCE = (*(CETable+3*(++iCE)));
nextContCE = (*(CETable+3*(iCE)+1));
}
lh->nextCE = nextCE;
lh->nextContCE = nextContCE;
lh->nextCE = nextCE;
lh->nextContCE = nextContCE;
return iCE;
return iCE;
}
U_CFUNC void ucol_inv_getGapPositions(UColTokenParser *src, UColTokListHeader *lh, UErrorCode *status) {
static void ucol_inv_getGapPositions(UColTokenParser *src, UColTokListHeader *lh, UErrorCode *status) {
/* reset all the gaps */
int32_t i = 0;
uint32_t *CETable = (uint32_t *)((uint8_t *)src->invUCA+src->invUCA->table);
@ -333,8 +339,8 @@ U_CFUNC void ucol_inv_getGapPositions(UColTokenParser *src, UColTokListHeader *l
UCAConstants *consts = (UCAConstants *)((uint8_t *)src->UCA->image + src->UCA->image->UCAConsts);
if((lh->baseCE & 0xFF000000)>= (consts->UCA_PRIMARY_IMPLICIT_MIN<<24) && (lh->baseCE & 0xFF000000) <= (consts->UCA_PRIMARY_IMPLICIT_MAX<<24) ) { /* implicits - */
//if(lh->baseCE >= PRIMARY_IMPLICIT_MIN && lh->baseCE < PRIMARY_IMPLICIT_MAX ) { /* implicits - */
if((lh->baseCE & 0xFF000000)>= (consts->UCA_PRIMARY_IMPLICIT_MIN<<24) && (lh->baseCE & 0xFF000000) <= (consts->UCA_PRIMARY_IMPLICIT_MAX<<24) ) { /* implicits - */
//if(lh->baseCE >= PRIMARY_IMPLICIT_MIN && lh->baseCE < PRIMARY_IMPLICIT_MAX ) { /* implicits - */
lh->pos[0] = 0;
t1 = lh->baseCE;
t2 = lh->baseContCE & UCOL_REMOVE_CONTINUATION;
@ -429,14 +435,14 @@ U_CFUNC void ucol_inv_getGapPositions(UColTokenParser *src, UColTokListHeader *l
} \
}
U_CFUNC uint32_t ucol_getNextGenerated(ucolCEGenerator *g, UErrorCode *status) {
static uint32_t ucol_getNextGenerated(ucolCEGenerator *g, UErrorCode *status) {
if(U_SUCCESS(*status)) {
g->current = ucol_nextWeight(g->ranges, &g->noOfRanges);
}
return g->current;
}
U_CFUNC uint32_t ucol_getSimpleCEGenerator(ucolCEGenerator *g, UColToken *tok, uint32_t strength, UErrorCode *status) {
static uint32_t ucol_getSimpleCEGenerator(ucolCEGenerator *g, UColToken *tok, uint32_t strength, UErrorCode *status) {
/* TODO: rename to enum names */
uint32_t high, low, count=1;
uint32_t maxByte = (strength == UCOL_TERTIARY)?0x3F:0xFF;
@ -453,7 +459,7 @@ U_CFUNC uint32_t ucol_getSimpleCEGenerator(ucolCEGenerator *g, UColToken *tok, u
if(tok->next != NULL && tok->next->strength == strength) {
count = tok->next->toInsert;
}
}
g->noOfRanges = ucol_allocWeights(low, high, count, maxByte, g->ranges);
g->current = UCOL_BYTE_COMMON<<24;
@ -464,7 +470,7 @@ U_CFUNC uint32_t ucol_getSimpleCEGenerator(ucolCEGenerator *g, UColToken *tok, u
return g->current;
}
U_CFUNC uint32_t ucol_getCEGenerator(ucolCEGenerator *g, uint32_t* lows, uint32_t* highs, UColToken *tok, uint32_t fStrength, UErrorCode *status) {
static uint32_t ucol_getCEGenerator(ucolCEGenerator *g, uint32_t* lows, uint32_t* highs, UColToken *tok, uint32_t fStrength, UErrorCode *status) {
uint32_t strength = tok->strength;
uint32_t low = lows[fStrength*3+strength];
uint32_t high = highs[fStrength*3+strength];
@ -499,7 +505,7 @@ U_CFUNC uint32_t ucol_getCEGenerator(ucolCEGenerator *g, uint32_t* lows, uint32_
return 0;
}
}
}
}
if(low == 0) {
low = 0x01000000;
@ -511,14 +517,14 @@ U_CFUNC uint32_t ucol_getCEGenerator(ucolCEGenerator *g, uint32_t* lows, uint32_
}
if(high > (UCOL_COMMON_BOT2<<24) && high < (uint32_t)(UCOL_COMMON_TOP2<<24)) {
high = UCOL_COMMON_TOP2<<24;
}
}
if(low < (UCOL_COMMON_BOT2<<24)) {
g->noOfRanges = ucol_allocWeights(UCOL_BYTE_UNSHIFTED_MIN<<24, high, count, maxByte, g->ranges);
g->current = ucol_nextWeight(g->ranges, &g->noOfRanges);
//g->current = UCOL_COMMON_BOT2<<24;
return g->current;
}
}
}
g->noOfRanges = ucol_allocWeights(low, high, count, maxByte, g->ranges);
if(g->noOfRanges == 0) {
@ -530,7 +536,7 @@ U_CFUNC uint32_t ucol_getCEGenerator(ucolCEGenerator *g, uint32_t* lows, uint32_
static
uint32_t u_toLargeKana(const UChar *source, const uint32_t sourceLen, UChar *resBuf, const uint32_t resLen, UErrorCode *status) {
uint32_t i = 0;
uint32_t i = 0;
UChar c;
if(U_FAILURE(*status)) {
@ -565,7 +571,7 @@ uint32_t u_toLargeKana(const UChar *source, const uint32_t sourceLen, UChar *res
static
uint32_t u_toSmallKana(const UChar *source, const uint32_t sourceLen, UChar *resBuf, const uint32_t resLen, UErrorCode *status) {
uint32_t i = 0;
uint32_t i = 0;
UChar c;
if(U_FAILURE(*status)) {
@ -690,7 +696,7 @@ U_CFUNC void ucol_doCE(UColTokenParser *src, uint32_t *CEparts, UColToken *tok,
// we want to set case bits here and now, not later.
// Case bits handling
// Case bits handling
if(tok->CEs[0] != 0) { // case bits should be set only for non-ignorables
tok->CEs[0] &= 0xFFFFFF3F; // Clean the case bits field
int32_t cSize = (tok->source & 0xFF000000) >> 24;
@ -738,7 +744,7 @@ U_CFUNC void ucol_initBuffers(UColTokenParser *src, UColTokListHeader *lh, UErro
}
tok=tok->previous;
tok->toInsert = t[tok->strength];
}
}
tok->toInsert = t[tok->strength];
ucol_inv_getGapPositions(src, lh, status);
@ -760,7 +766,7 @@ U_CFUNC void ucol_initBuffers(UColTokenParser *src, UColTokListHeader *lh, UErro
tok=lh->first[UCOL_TOK_POLARITY_POSITIVE];
do {
do {
fprintf(stderr,"%i", tok->toInsert);
tok = tok->next;
} while(tok != NULL);
@ -792,7 +798,7 @@ U_CFUNC void ucol_initBuffers(UColTokenParser *src, UColTokListHeader *lh, UErro
CEparts[UCOL_PRIMARY] = lh->gapsLo[fStrength*3];
CEparts[UCOL_SECONDARY] = lh->gapsLo[fStrength*3+1];
/*CEparts[UCOL_TERTIARY] = ucol_getCEGenerator(&Gens[2], lh->gapsLo[fStrength*3+2], lh->gapsHi[fStrength*3+2], tok, UCOL_TERTIARY); */
CEparts[UCOL_TERTIARY] = ucol_getCEGenerator(&Gens[UCOL_TERTIARY], lh->gapsLo, lh->gapsHi, tok, fStrength, status);
CEparts[UCOL_TERTIARY] = ucol_getCEGenerator(&Gens[UCOL_TERTIARY], lh->gapsLo, lh->gapsHi, tok, fStrength, status);
} else if(initStrength == UCOL_SECONDARY) { /* secondaries */
CEparts[UCOL_PRIMARY] = lh->gapsLo[fStrength*3];
/*CEparts[1] = ucol_getCEGenerator(&Gens[1], lh->gapsLo[fStrength*3+1], lh->gapsHi[fStrength*3+1], tok, 1);*/
@ -894,8 +900,8 @@ U_CFUNC void ucol_createElements(UColTokenParser *src, tempUCATable *t, UColTokL
}
/* copy UChars */
// We kept prefix and source kind of together, as it is a kind of a contraction.
// However, now we have to slice the prefix off the main thing -
// We kept prefix and source kind of together, as it is a kind of a contraction.
// However, now we have to slice the prefix off the main thing -
el.prefix = el.prefixChars;
el.cPoints = el.uchars;
if(tok->prefix != 0) { // we will just copy the prefix here, and adjust accordingly in the
@ -904,13 +910,13 @@ U_CFUNC void ucol_createElements(UColTokenParser *src, tempUCATable *t, UColTokL
el.prefixSize = tok->prefix>>24;
uprv_memcpy(el.prefix, src->source + (tok->prefix & 0x00FFFFFF), el.prefixSize*sizeof(UChar));
el.cSize = (tok->source >> 24)-(tok->prefix>>24);
el.cSize = (tok->source >> 24)-(tok->prefix>>24);
uprv_memcpy(el.uchars, (tok->source & 0x00FFFFFF)+(tok->prefix>>24) + src->source, el.cSize*sizeof(UChar));
} else {
el.prefixSize = 0;
*el.prefix = 0;
el.cSize = (tok->source >> 24);
el.cSize = (tok->source >> 24);
uprv_memcpy(el.uchars, (tok->source & 0x00FFFFFF) + src->source, el.cSize*sizeof(UChar));
}
if(src->UCA != NULL) {
@ -924,7 +930,7 @@ U_CFUNC void ucol_createElements(UColTokenParser *src, tempUCATable *t, UColTokL
int16_t fcd = unorm_getFCD16(fcdTrieData, el.cPoints[i]);
if ( (fcd && 0xff) == 0 ) {
// reset flag when current char is not combining mark.
containCombinMarks = FALSE;
containCombinMarks = FALSE;
}
else {
containCombinMarks = TRUE;
@ -988,9 +994,9 @@ _processUCACompleteIgnorables(const void *context, UChar32 start, UChar32 limit,
}
U_CDECL_END
static void
static void
ucol_uprv_bld_copyRangeFromUCA(UColTokenParser *src, tempUCATable *t,
UChar32 start, UChar32 end,
UChar32 start, UChar32 end,
UErrorCode *status)
{
//UChar decomp[256];
@ -1004,51 +1010,52 @@ ucol_uprv_bld_copyRangeFromUCA(UColTokenParser *src, tempUCATable *t,
if(U_SUCCESS(*status)) {
for(u = start; u<=end; u++) {
if((CE = utrie_get32(t->mapping, u, NULL)) == UCOL_NOT_FOUND
if((CE = utrie_get32(t->mapping, u, NULL)) == UCOL_NOT_FOUND
/* this test is for contractions that are missing the starting element. */
|| ((isCntTableElement(CE)) &&
(uprv_cnttab_getCE(t->contractions, CE, 0, status) == UCOL_NOT_FOUND))
) {
el.cSize = 0;
U16_APPEND_UNSAFE(el.uchars, el.cSize, u);
//decomp[0] = (UChar)u;
//el.uchars[0] = (UChar)u;
el.cPoints = el.uchars;
//el.cSize = 1;
el.noOfCEs = 0;
el.prefix = el.prefixChars;
el.prefixSize = 0;
//uprv_init_collIterate(src->UCA, decomp, 1, &colIt);
// We actually want to check whether this element is a special
// If it is an implicit element (hangul, CJK - we want to copy the
// special, not the resolved CEs) - for hangul, copying resolved
// would just make things the same (there is an expansion and it
// takes approximately the same amount of time to resolve as
// falling back to the UCA).
/*
UTRIE_GET32(src->UCA->mapping, u, CE);
tag = getCETag(CE);
if(tag == HANGUL_SYLLABLE_TAG || tag == CJK_IMPLICIT_TAG
|| tag == IMPLICIT_TAG || tag == TRAIL_SURROGATE_TAG
|| tag == LEAD_SURROGATE_TAG) {
el.CEs[el.noOfCEs++] = CE;
} else {
*/
// It turns out that it does not make sense to keep implicits
// unresolved. The cost of resolving them is big enough so that
// it doesn't make any difference whether we have to go to the UCA
// or not.
{
uprv_init_collIterate(src->UCA, el.uchars, el.cSize, &colIt);
while(CE != UCOL_NO_MORE_CES) {
CE = ucol_getNextCE(src->UCA, &colIt, status);
if(CE != UCOL_NO_MORE_CES) {
el.CEs[el.noOfCEs++] = CE;
}
)
{
el.cSize = 0;
U16_APPEND_UNSAFE(el.uchars, el.cSize, u);
//decomp[0] = (UChar)u;
//el.uchars[0] = (UChar)u;
el.cPoints = el.uchars;
//el.cSize = 1;
el.noOfCEs = 0;
el.prefix = el.prefixChars;
el.prefixSize = 0;
//uprv_init_collIterate(src->UCA, decomp, 1, &colIt);
// We actually want to check whether this element is a special
// If it is an implicit element (hangul, CJK - we want to copy the
// special, not the resolved CEs) - for hangul, copying resolved
// would just make things the same (there is an expansion and it
// takes approximately the same amount of time to resolve as
// falling back to the UCA).
/*
UTRIE_GET32(src->UCA->mapping, u, CE);
tag = getCETag(CE);
if(tag == HANGUL_SYLLABLE_TAG || tag == CJK_IMPLICIT_TAG
|| tag == IMPLICIT_TAG || tag == TRAIL_SURROGATE_TAG
|| tag == LEAD_SURROGATE_TAG) {
el.CEs[el.noOfCEs++] = CE;
} else {
*/
// It turns out that it does not make sense to keep implicits
// unresolved. The cost of resolving them is big enough so that
// it doesn't make any difference whether we have to go to the UCA
// or not.
{
uprv_init_collIterate(src->UCA, el.uchars, el.cSize, &colIt);
while(CE != UCOL_NO_MORE_CES) {
CE = ucol_getNextCE(src->UCA, &colIt, status);
if(CE != UCOL_NO_MORE_CES) {
el.CEs[el.noOfCEs++] = CE;
}
}
uprv_uca_addAnElement(t, &el, status);
}
uprv_uca_addAnElement(t, &el, status);
}
}
}
}
@ -1061,43 +1068,43 @@ UCATableHeader *ucol_assembleTailoringTable(UColTokenParser *src, UErrorCode *st
return NULL;
}
/*
2. Eliminate the negative lists by doing the following for each non-null negative list:
o if previousCE(baseCE, strongestN) != some ListHeader X's baseCE,
create new ListHeader X
o reverse the list, add to the end of X's positive list. Reset the strength of the
first item you add, based on the stronger strength levels of the two lists.
2. Eliminate the negative lists by doing the following for each non-null negative list:
o if previousCE(baseCE, strongestN) != some ListHeader X's baseCE,
create new ListHeader X
o reverse the list, add to the end of X's positive list. Reset the strength of the
first item you add, based on the stronger strength levels of the two lists.
*/
/*
3. For each ListHeader with a non-null positive list:
3. For each ListHeader with a non-null positive list:
*/
/*
o Find all character strings with CEs between the baseCE and the
next/previous CE, at the strength of the first token. Add these to the
tailoring.
? That is, if UCA has ... x <<< X << x' <<< X' < y ..., and the
tailoring has & x < z...
? Then we change the tailoring to & x <<< X << x' <<< X' < z ...
o Find all character strings with CEs between the baseCE and the
next/previous CE, at the strength of the first token. Add these to the
tailoring.
? That is, if UCA has ... x <<< X << x' <<< X' < y ..., and the
tailoring has & x < z...
? Then we change the tailoring to & x <<< X << x' <<< X' < z ...
*/
/* It is possible that this part should be done even while constructing list */
/* The problem is that it is unknown what is going to be the strongest weight */
/* So we might as well do it here */
/*
o Allocate CEs for each token in the list, based on the total number N of the
largest level difference, and the gap G between baseCE and nextCE at that
level. The relation * between the last item and nextCE is the same as the
strongest strength.
o Example: baseCE < a << b <<< q << c < d < e * nextCE(X,1)
? There are 3 primary items: a, d, e. Fit them into the primary gap.
Then fit b and c into the secondary gap between a and d, then fit q
into the tertiary gap between b and c.
o Allocate CEs for each token in the list, based on the total number N of the
largest level difference, and the gap G between baseCE and nextCE at that
level. The relation * between the last item and nextCE is the same as the
strongest strength.
o Example: baseCE < a << b <<< q << c < d < e * nextCE(X,1)
? There are 3 primary items: a, d, e. Fit them into the primary gap.
Then fit b and c into the secondary gap between a and d, then fit q
into the tertiary gap between b and c.
o Example: baseCE << b <<< q << c * nextCE(X,2)
? There are 2 secondary items: b, c. Fit them into the secondary gap.
Then fit q into the tertiary gap between b and c.
o When incrementing primary values, we will not cross high byte
boundaries except where there is only a single-byte primary. That is to
ensure that the script reordering will continue to work.
o Example: baseCE << b <<< q << c * nextCE(X,2)
? There are 2 secondary items: b, c. Fit them into the secondary gap.
Then fit q into the tertiary gap between b and c.
o When incrementing primary values, we will not cross high byte
boundaries except where there is only a single-byte primary. That is to
ensure that the script reordering will continue to work.
*/
UCATableHeader *image = (UCATableHeader *)uprv_malloc(sizeof(UCATableHeader));
/* test for NULL */
@ -1108,7 +1115,7 @@ UCATableHeader *ucol_assembleTailoringTable(UColTokenParser *src, UErrorCode *st
uprv_memcpy(image, src->UCA->image, sizeof(UCATableHeader));
for(i = 0; i<src->resultLen; i++) {
/* now we need to generate the CEs */
/* now we need to generate the CEs */
/* We stuff the initial value in the buffers, and increase the appropriate buffer */
/* According to strength */
if(U_SUCCESS(*status)) {
@ -1132,7 +1139,7 @@ UCATableHeader *ucol_assembleTailoringTable(UColTokenParser *src, UErrorCode *st
src->varTop->listHeader->first = src->varTop->next;
}
if(src->varTop->listHeader->last == src->varTop) { /* first in list */
src->varTop->listHeader->last = src->varTop->previous;
src->varTop->listHeader->last = src->varTop->previous;
}
if(src->varTop->next != NULL) {
src->varTop->next->previous = src->varTop->previous;
@ -1154,7 +1161,7 @@ UCATableHeader *ucol_assembleTailoringTable(UColTokenParser *src, UErrorCode *st
/* now we will go through list once more and resolve expansions, */
/* make UCAElements structs and add them to table */
for(i = 0; i<src->resultLen; i++) {
/* now we need to generate the CEs */
/* now we need to generate the CEs */
/* We stuff the initial value in the buffers, and increase the appropriate buffer */
/* According to strength */
if(U_SUCCESS(*status)) {
@ -1189,7 +1196,7 @@ UCATableHeader *ucol_assembleTailoringTable(UColTokenParser *src, UErrorCode *st
while(*conts != 0) {
/*tailoredCE = ucmpe32_get(t->mapping, *conts);*/
tailoredCE = utrie_get32(t->mapping, *conts, NULL);
if(tailoredCE != UCOL_NOT_FOUND) {
if(tailoredCE != UCOL_NOT_FOUND) {
UBool needToAdd = TRUE;
if(isCntTableElement(tailoredCE)) {
if(uprv_cnttab_isTailored(t->contractions, tailoredCE, conts+1, status) == TRUE) {
@ -1236,9 +1243,9 @@ UCATableHeader *ucol_assembleTailoringTable(UColTokenParser *src, UErrorCode *st
/* still need to produce compatibility closure */
UCATableHeader *myData = uprv_uca_assembleTable(t, status);
UCATableHeader *myData = uprv_uca_assembleTable(t, status);
uprv_uca_closeTempTable(t);
uprv_uca_closeTempTable(t);
uprv_free(image);
return myData;

View File

@ -1,7 +1,7 @@
/*
*******************************************************************************
*
* Copyright (C) 2001-2005, International Business Machines
* Copyright (C) 2001-2008, International Business Machines
* Corporation and others. All Rights Reserved.
*
*******************************************************************************
@ -28,20 +28,12 @@
#include "unicode/utypes.h"
#if !UCONFIG_NO_COLLATION
#if !UCONFIG_NO_COLLATION_BUILDER
/*#if !UCONFIG_NO_COLLATION_BUILDER*/
#include "ucol_imp.h"
#include "ucol_tok.h"
#include "ucol_elm.h"
#include "ucol_wgt.h"
#include "uhash.h"
#include "cpputils.h"
#include "unicode/ustring.h"
#include "unicode/unistr.h"
#include "unicode/normlzr.h"
U_CFUNC
UCATableHeader *ucol_assembleTailoringTable(UColTokenParser *src, UErrorCode *status);
@ -56,7 +48,10 @@ typedef struct {
uint32_t fHigh; /*forbidden High */
} ucolCEGenerator;
#endif /* #if !UCONFIG_NO_COLLATION_BUILDER */
U_CFUNC uint32_t U_EXPORT2 ucol_getCEStrengthDifference(uint32_t CE, uint32_t contCE,
uint32_t prevCE, uint32_t prevContCE);
/*#endif*/ /* #if !UCONFIG_NO_COLLATION_BUILDER */
#endif /* #if !UCONFIG_NO_COLLATION */
#endif

View File

@ -1,7 +1,7 @@
/*
*******************************************************************************
*
* Copyright (C) 2001-2005, International Business Machines
* Copyright (C) 2001-2008, International Business Machines
* Corporation and others. All Rights Reserved.
*
*******************************************************************************
@ -25,10 +25,7 @@
#if !UCONFIG_NO_COLLATION
/*#include "ucmpe32.h"*/
#include "utrie.h"
#include "uhash.h"
#include "ucol_elm.h"
#include "ucol_imp.h"
U_CDECL_BEGIN

View File

@ -33,6 +33,7 @@
#include "unicode/normlzr.h"
#include "ucol_elm.h"
#include "ucol_tok.h"
#include "ucol_cnt.h"
#include "unormimp.h"
#include "unicode/caniter.h"
#include "cmemory.h"

View File

@ -26,7 +26,6 @@
#if !UCONFIG_NO_COLLATION
#include "ucol_cnt.h"
#include "ucol_imp.h"
#ifdef UCOL_DEBUG
@ -136,8 +135,6 @@ U_CAPI void U_EXPORT2 uprv_uca_closeTempTable(tempUCATable *t);
U_CAPI uint32_t U_EXPORT2 uprv_uca_addAnElement(tempUCATable *t, UCAElements *element, UErrorCode *status);
U_CAPI UCATableHeader * U_EXPORT2 uprv_uca_assembleTable(tempUCATable *t, UErrorCode *status);
U_CAPI int32_t U_EXPORT2 uprv_uca_canonicalClosure(tempUCATable *t, UColTokenParser *src, UErrorCode *status);
#define paddedsize(something) ((something)+((((something)%4)!=0)?(4-(something)%4):0))
#define headersize (paddedsize(sizeof(UCATableHeader))+paddedsize(sizeof(UColOptionSet)))
U_CDECL_END

View File

@ -43,9 +43,6 @@
#include "unicode/ucol.h"
#include "utrie.h"
#include "uresimp.h"
#include "unicode/udata.h"
#include "unicode/uiter.h"
/* This is the internal header file which contains important declarations for
* the collation framework.
@ -287,6 +284,9 @@ typedef struct collIterate {
/*int32_t iteratorIndex;*/
} collIterate;
#define paddedsize(something) ((something)+((((something)%4)!=0)?(4-(something)%4):0))
#define headersize (paddedsize(sizeof(UCATableHeader))+paddedsize(sizeof(UColOptionSet)))
/*
struct used internally in getSpecial*CE.
data similar to collIterate.
@ -964,7 +964,7 @@ U_CAPI UBool U_EXPORT2 ucol_isTailored(const UCollator *coll, const UChar u, UEr
U_CAPI const InverseUCATableHeader* U_EXPORT2 ucol_initInverseUCA(UErrorCode *status);
U_CAPI void U_EXPORT2
uprv_uca_initImplicitConstants(int32_t minPrimary, int32_t maxPrimary, UErrorCode *status);
uprv_uca_initImplicitConstants(UErrorCode *status);
U_CAPI uint32_t U_EXPORT2
uprv_uca_getImplicitFromRaw(UChar32 cp);

View File

@ -1,6 +1,6 @@
/*
*******************************************************************************
* Copyright (C) 2004-2006, International Business Machines
* Copyright (C) 2004-2008, International Business Machines
* Corporation and others. All Rights Reserved.
*******************************************************************************
* file name: ucol_sit.cpp
@ -14,6 +14,7 @@
*/
#include "unicode/ustring.h"
#include "unicode/udata.h"
#include "utracimp.h"
#include "ucol_imp.h"
@ -21,6 +22,7 @@
#include "unormimp.h"
#include "cmemory.h"
#include "cstring.h"
#include "uresimp.h"
#if !UCONFIG_NO_COLLATION
@ -30,7 +32,7 @@ enum OptionsList {
UCOL_SIT_REGION,
UCOL_SIT_VARIANT,
UCOL_SIT_KEYWORD,
UCOL_SIT_RFC3166BIS,
UCOL_SIT_BCP47,
UCOL_SIT_STRENGTH,
UCOL_SIT_CASE_LEVEL,
UCOL_SIT_CASE_FIRST,
@ -44,34 +46,6 @@ enum OptionsList {
UCOL_SIT_ITEMS_COUNT
};
/* list of locales for packing of a collator to an integer.
* This list corresponds to ICU 3.0. If more collation bearing
* locales are added in the future, this won't be a simple array
* but a mapping allowing forward and reverse lookup would have to
* be established. Currently, the mapping is from locale name to
* index.
*/
static const char* const locales[] = {
/* 00 - 09 */ "ar", "be", "bg", "ca", "cs", "da", "de", "de__PHONEBOOK", "el", "en",
/* 10 - 19 */ "en_BE", "eo", "es", "es__TRADITIONAL", "et", "fa", "fa_AF", "fi", "fo", "fr",
/* 20 - 29 */ "gu", "he", "hi", "hi__DIRECT", "hr", "hu", "is", "it", "ja", "kk",
/* 30 - 39 */ "kl", "kn", "ko", "lt", "lv", "mk", "mr", "mt", "nb", "nn",
/* 40 - 49 */ "om", "pa", "pl", "ps", "ro", "root", "ru", "sh", "sk", "sl",
/* 50 - 59 */ "sq", "sr", "sv", "ta", "te", "th", "tr", "uk", "vi", "zh",
/* 60 - 64 */ "zh_HK", "zh_MO", "zh_TW", "zh_TW_STROKE", "zh__PINYIN"
};
static const char* const keywords[] = {
/* 00 */ "",
/* 01 */ "direct",
/* 02 */ "phonebook",
/* 03 */ "pinyin",
/* 04 */ "standard",
/* 05 */ "stroke",
/* 06 */ "traditional"
};
/* option starters chars. */
static const char alternateHArg = 'A';
static const char variableTopValArg = 'B';
@ -665,251 +639,6 @@ ucol_normalizeShortDefinitionString(const char *definition,
return ucol_sit_dumpSpecs(&s, destination, capacity, status);
}
// structure for packing the bits of the attributes in the
// identifier number.
// locale is packed separately
struct bitPacking {
char letter;
uint32_t offset;
uint32_t width;
UColAttribute attribute;
UColAttributeValue values[6];
};
static const bitPacking attributesToBits[UCOL_ATTRIBUTE_COUNT] = {
/* french */ { frenchCollArg, 29, 2, UCOL_FRENCH_COLLATION, { UCOL_DEFAULT, UCOL_OFF, UCOL_ON }},
/* alternate */ { alternateHArg, 27, 2, UCOL_ALTERNATE_HANDLING, { UCOL_DEFAULT, UCOL_NON_IGNORABLE, UCOL_SHIFTED }},
/* case first */ { caseFirstArg, 25, 2, UCOL_CASE_FIRST, { UCOL_DEFAULT, UCOL_OFF, UCOL_LOWER_FIRST, UCOL_UPPER_FIRST }},
/* case level */ { caseLevelArg, 23, 2, UCOL_CASE_LEVEL, { UCOL_DEFAULT, UCOL_OFF, UCOL_ON }},
/* normalization */ { normArg, 21, 2, UCOL_NORMALIZATION_MODE, { UCOL_DEFAULT, UCOL_OFF, UCOL_ON }},
/* strength */ { strengthArg, 18, 3, UCOL_STRENGTH, { UCOL_DEFAULT, UCOL_PRIMARY, UCOL_SECONDARY, UCOL_TERTIARY, UCOL_QUATERNARY, UCOL_IDENTICAL }},
/* hiragana */ { hiraganaQArg, 16, 2, UCOL_HIRAGANA_QUATERNARY_MODE, { UCOL_DEFAULT, UCOL_OFF, UCOL_ON }},
/* numeric coll */ { numericCollArg, 14, 2, UCOL_NUMERIC_COLLATION, { UCOL_DEFAULT, UCOL_OFF, UCOL_ON }}
};
static const uint32_t keywordShift = 9;
static const uint32_t keywordWidth = 5;
static const uint32_t localeShift = 0;
static const uint32_t localeWidth = 7;
static uint32_t ucol_sit_putLocaleInIdentifier(uint32_t result, const char* locale, UErrorCode* status) {
char buffer[internalBufferSize], keywordBuffer[internalBufferSize],
baseName[internalBufferSize], localeBuffer[internalBufferSize];
int32_t len = 0, keywordLen = 0,
baseNameLen = 0, localeLen = 0;
uint32_t i = 0;
UBool isAvailable = FALSE;
if(locale) {
len = uloc_canonicalize(locale, buffer, internalBufferSize, status);
localeLen = ucol_getFunctionalEquivalent(localeBuffer, internalBufferSize, "collation", buffer, &isAvailable, status);
keywordLen = uloc_getKeywordValue(buffer, "collation", keywordBuffer, internalBufferSize, status);
baseNameLen = uloc_getBaseName(buffer, baseName, internalBufferSize, status);
/*Binary search for the map entry for normal cases */
uint32_t low = 0;
uint32_t high = sizeof(locales)/sizeof(locales[0]);
uint32_t mid = high;
uint32_t oldmid = 0;
int32_t compVal = 0;
while (high > low) /*binary search*/{
mid = (high+low) >> 1; /*Finds median*/
if (mid == oldmid)
return UCOL_SIT_COLLATOR_NOT_ENCODABLE; // we didn't find it
compVal = uprv_strcmp(baseName, locales[mid]);
if (compVal < 0){
high = mid;
}
else if (compVal > 0){
low = mid;
}
else /*we found it*/{
break;
}
oldmid = mid;
}
result |= (mid & ((1 << localeWidth) - 1)) << localeShift;
}
if(keywordLen) {
for(i = 1; i < sizeof(keywords)/sizeof(keywords[0]); i++) {
if(uprv_strcmp(keywords[i], keywordBuffer) == 0) {
result |= (i & ((1 << keywordWidth) - 1)) << keywordShift;
break;
}
}
}
return result;
}
U_CAPI uint32_t U_EXPORT2
ucol_collatorToIdentifier(const UCollator *coll,
const char *locale,
UErrorCode *status)
{
uint32_t result = 0;
uint32_t i = 0, j = 0;
UColAttributeValue attrValue = UCOL_DEFAULT;
// if variable top is not default, we need to use strings
if(coll->variableTopValueisDefault != TRUE) {
return UCOL_SIT_COLLATOR_NOT_ENCODABLE;
}
if(locale == NULL) {
locale = ucol_getLocale(coll, ULOC_VALID_LOCALE, status);
}
result = ucol_sit_putLocaleInIdentifier(result, locale, status);
for(i = 0; i < sizeof(attributesToBits)/sizeof(attributesToBits[0]); i++) {
attrValue = ucol_getAttributeOrDefault(coll, attributesToBits[i].attribute, status);
j = 0;
while(attributesToBits[i].values[j] != attrValue) {
j++;
}
result |= (j & ((1 << attributesToBits[i].width) - 1)) << attributesToBits[i].offset;
}
return result;
}
U_CAPI UCollator* U_EXPORT2
ucol_openFromIdentifier(uint32_t identifier,
UBool forceDefaults,
UErrorCode *status)
{
uint32_t i = 0;
int32_t value = 0, keyword = 0;
char locale[internalBufferSize];
value = (identifier >> localeShift) & ((1 << localeWidth) - 1);
keyword = (identifier >> keywordShift) & ((1 << keywordWidth) - 1);
uprv_strcpy(locale, locales[value]);
if(keyword) {
uprv_strcat(locale, collationKeyword);
uprv_strcat(locale, keywords[keyword]);
}
UColAttributeValue attrValue = UCOL_DEFAULT;
UCollator *result = ucol_open(locale, status);
// variable top is not set in the identifier, so we can easily skip that on
for(i = 0; i < sizeof(attributesToBits)/sizeof(attributesToBits[0]); i++) {
value = (identifier >> attributesToBits[i].offset) & ((1 << attributesToBits[i].width) - 1);
attrValue = attributesToBits[i].values[value];
// the collator is all default, so we will set only the values that will differ from
// the default values.
if(attrValue != UCOL_DEFAULT) {
if(forceDefaults ||
ucol_getAttribute(result, attributesToBits[i].attribute, status) != attrValue) {
ucol_setAttribute(result, attributesToBits[i].attribute, attrValue, status);
}
}
}
return result;
}
U_CAPI int32_t U_EXPORT2
ucol_identifierToShortString(uint32_t identifier,
char *buffer,
int32_t capacity,
UBool forceDefaults,
UErrorCode *status)
{
int32_t locIndex = (identifier >> localeShift) & ((1 << localeWidth) - 1);
int32_t keywordIndex = (identifier >> keywordShift) & ((1 << keywordWidth) - 1);
CollatorSpec s;
ucol_sit_initCollatorSpecs(&s);
uprv_strcpy(s.locale, locales[locIndex]);
if(keywordIndex) {
uprv_strcat(s.locale, collationKeyword);
uprv_strcat(s.locale, keywords[keywordIndex]);
}
UCollator *coll = ucol_openFromIdentifier(identifier, forceDefaults, status);
int32_t resultLen = ucol_getShortDefinitionString(coll, s.locale, buffer, capacity, status);
ucol_close(coll);
return resultLen;
#if 0
// TODO: Crumy, crumy, crumy... Very hard to currently go algorithmically from
// identifier to short string. Do rethink
if(forceDefaults == FALSE) {
UCollator *coll = ucol_openFromIdentifier(identifier, FALSE, status);
int32_t resultLen = ucol_getShortDefinitionString(coll, s.locale, buffer, capacity, status);
ucol_close(coll);
return resultLen;
} else { // forceDefaults == TRUE
char letter;
UColAttributeValue value;
int32_t i = 0;
for(i = 0; i < sizeof(attributesToBits)/sizeof(attributesToBits[0]); i++) {
value = attributesToBits[i].values[(identifier >> attributesToBits[i].offset) & ((1 << attributesToBits[i].width) - 1)];
if(value != UCOL_DEFAULT) {
uprv_strcat(buffer, "_");
uprv_strncat(buffer, &attributesToBits[i].letter, 1);
letter = ucol_sit_attributeValueToLetter(value, status);
uprv_strncat(buffer, &letter, 1);
}
}
return ucol_sit_dumpSpecs(&s, buffer, capacity, status);
}
#endif
}
U_CAPI uint32_t U_EXPORT2
ucol_shortStringToIdentifier(const char *definition,
UBool forceDefaults,
UErrorCode *status)
{
UParseError parseError;
CollatorSpec s;
uint32_t result = 0;
uint32_t i = 0, j = 0;
ucol_sit_initCollatorSpecs(&s);
ucol_sit_readSpecs(&s, definition, &parseError, status);
ucol_sit_calculateWholeLocale(&s);
char locBuffer[internalBufferSize];
UBool isAvailable = FALSE;
UColAttributeValue attrValue = UCOL_DEFAULT;
ucol_getFunctionalEquivalent(locBuffer, internalBufferSize, "collation", s.locale, &isAvailable, status);
if(forceDefaults == FALSE) {
UCollator *coll = ucol_openFromShortString(definition, FALSE, &parseError, status);
result = ucol_collatorToIdentifier(coll, locBuffer, status);
ucol_close(coll);
} else { // forceDefaults == TRUE
result = ucol_sit_putLocaleInIdentifier(result, locBuffer, status);
for(i = 0; i < sizeof(attributesToBits)/sizeof(attributesToBits[0]); i++) {
attrValue = s.options[i];
j = 0;
while(attributesToBits[i].values[j] != attrValue) {
j++;
}
result |= (j & ((1 << attributesToBits[i].width) - 1)) << attributesToBits[i].offset;
}
}
return result;
}
U_CAPI UColAttributeValue U_EXPORT2
ucol_getAttributeOrDefault(const UCollator *coll, UColAttribute attr, UErrorCode *status)
{

View File

@ -1,7 +1,7 @@
/*
*******************************************************************************
*
* Copyright (C) 2001-2007, International Business Machines
* Copyright (C) 2001-2008, International Business Machines
* Corporation and others. All Rights Reserved.
*
*******************************************************************************
@ -27,6 +27,7 @@
#include "unicode/uniset.h"
#include "ucol_tok.h"
#include "ucol_bld.h"
#include "cmemory.h"
#include "util.h"

View File

@ -175,9 +175,6 @@ U_CFUNC int32_t U_EXPORT2 ucol_inv_getPrevCE(const UColTokenParser *src,
uint32_t *prevCE, uint32_t *prevContCE,
uint32_t strength);
U_CFUNC uint32_t U_EXPORT2 ucol_getCEStrengthDifference(uint32_t CE, uint32_t contCE,
uint32_t prevCE, uint32_t prevContCE);
#endif /* #if !UCONFIG_NO_COLLATION */

View File

@ -1,7 +1,7 @@
/*
*******************************************************************************
*
* Copyright (C) 1999-2001, International Business Machines
* Copyright (C) 1999-2008, International Business Machines
* Corporation and others. All Rights Reserved.
*
*******************************************************************************
@ -21,8 +21,6 @@
#if !UCONFIG_NO_COLLATION
#include "ucol_imp.h"
/* definitions for CE weights */
typedef struct WeightRange {

View File

@ -1012,99 +1012,6 @@ ucol_getLocaleByType(const UCollator *coll, ULocDataLocaleType type, UErrorCode
U_STABLE USet * U_EXPORT2
ucol_getTailoredSet(const UCollator *coll, UErrorCode *status);
#ifndef U_HIDE_INTERNAL_API
/**
* Returned by ucol_collatorToIdentifier to signify that collator is
* not encodable as an identifier.
* @internal ICU 3.0
*/
#define UCOL_SIT_COLLATOR_NOT_ENCODABLE 0x80000000
#endif /* U_HIDE_INTERNAL_API */
/**
* Get a 31-bit identifier given a collator.
* @param coll UCollator
* @param locale a locale that will appear as a collators locale in the resulting
* short string definition. If NULL, the locale will be harvested
* from the collator.
* @param status holds error messages
* @return 31-bit identifier. MSB is used if the collator cannot be encoded. In that
* case UCOL_SIT_COLLATOR_NOT_ENCODABLE is returned
* @see ucol_openFromIdentifier
* @see ucol_identifierToShortString
* @internal ICU 3.0
*/
U_INTERNAL uint32_t U_EXPORT2
ucol_collatorToIdentifier(const UCollator *coll,
const char *locale,
UErrorCode *status);
/**
* Open a collator given a 31-bit identifier
* @param identifier 31-bit identifier, encoded by calling ucol_collatorToIdentifier
* @param forceDefaults if FALSE, the settings that are the same as the collator
* default settings will not be applied (for example, setting
* French secondary on a French collator would not be executed).
* If TRUE, all the settings will be applied regardless of the
* collator default value. If the definition
* strings that can be produced from a collator instantiated by
* calling this API are to be cached, should be set to FALSE.
* @param status for returning errors
* @return UCollator object
* @see ucol_collatorToIdentifier
* @see ucol_identifierToShortString
* @internal ICU 3.0
*/
U_INTERNAL UCollator* U_EXPORT2
ucol_openFromIdentifier(uint32_t identifier,
UBool forceDefaults,
UErrorCode *status);
/**
* Calculate the short definition string given an identifier. Supports preflighting.
* @param identifier 31-bit identifier, encoded by calling ucol_collatorToIdentifier
* @param buffer buffer to store the result
* @param capacity buffer capacity
* @param forceDefaults whether the settings that are the same as the default setting
* should be forced anyway. Setting this argument to FALSE reduces
* the number of different configurations, but decreases performace
* as a collator has to be instantiated.
* @param status for returning errors
* @return length of the short definition string
* @see ucol_collatorToIdentifier
* @see ucol_openFromIdentifier
* @see ucol_shortStringToIdentifier
* @internal ICU 3.0
*/
U_INTERNAL int32_t U_EXPORT2
ucol_identifierToShortString(uint32_t identifier,
char *buffer,
int32_t capacity,
UBool forceDefaults,
UErrorCode *status);
/**
* Calculate the identifier given a short definition string. Supports preflighting.
* @param definition short string definition
* @param forceDefaults whether the settings that are the same as the default setting
* should be forced anyway. Setting this argument to FALSE reduces
* the number of different configurations, but decreases performace
* as a collator has to be instantiated.
* @param status for returning errors
* @return identifier
* @see ucol_collatorToIdentifier
* @see ucol_openFromIdentifier
* @see ucol_identifierToShortString
* @internal ICU 3.0
*/
U_INTERNAL uint32_t U_EXPORT2
ucol_shortStringToIdentifier(const char *definition,
UBool forceDefaults,
UErrorCode *status);
/**
* Universal attribute getter that returns UCOL_DEFAULT if the value is default
* @param coll collator which attributes are to be changed

View File

@ -1,6 +1,6 @@
/********************************************************************
* COPYRIGHT:
* Copyright (c) 1997-2007, International Business Machines Corporation and
* Copyright (c) 1997-2008, International Business Machines Corporation and
* others. All Rights Reserved.
********************************************************************/
/*******************************************************************************
@ -38,6 +38,8 @@
#include "unicode/ucol.h"
#include "unicode/uloc.h"
#include "unicode/ures.h"
#include "unicode/udata.h"
#include "unicode/ucoleitr.h"
#include "unicode/ustring.h"
#include "unicode/uclean.h"
@ -188,19 +190,17 @@ static char* U_EXPORT2 sortKeyToString(const UCollator *coll, const uint8_t *sor
void addAllCollTest(TestNode** root)
{
addTest(root, &TestPrimary, "tscoll/callcoll/TestPrimary");
addTest(root, &TestSecondary, "tscoll/callcoll/TestSecondary");
addTest(root, &TestTertiary, "tscoll/callcoll/TestTertiary");
addTest(root, &TestIdentical, "tscoll/callcoll/TestIdentical");
addTest(root, &TestExtra, "tscoll/callcoll/TestExtra");
addTest(root, &TestJB581, "tscoll/callcoll/TestJB581");
addTest(root, &TestVariableTop, "tscoll/callcoll/TestVariableTop");
addTest(root, &TestJB581, "tscoll/callcoll/TestJB581");
addTest(root, &TestVariableTop, "tscoll/callcoll/TestVariableTop");
addTest(root, &TestSurrogates, "tscoll/callcoll/TestSurrogates");
addTest(root, &TestInvalidRules, "tscoll/callcoll/TestInvalidRules");
addTest(root, &TestJB1401, "tscoll/callcoll/TestJB1401");
addTest(root, &TestJitterbug1098, "tscoll/callcoll/TestJitterbug1098");
addTest(root, &TestJB1401, "tscoll/callcoll/TestJB1401");
addTest(root, &TestJitterbug1098, "tscoll/callcoll/TestJitterbug1098");
addTest(root, &TestFCDCrash, "tscoll/callcoll/TestFCDCrash");
addTest(root, &TestJ5298, "tscoll/callcoll/TestJ5298");
}

View File

@ -1,16 +1,16 @@
/********************************************************************
* COPYRIGHT:
* Copyright (c) 1997-2007, International Business Machines Corporation and
* Copyright (c) 1997-2008, International Business Machines Corporation and
* others. All Rights Reserved.
********************************************************************/
/********************************************************************************
/*****************************************************************************
*
* File CAPITEST.C
*
* Modification History:
* Name Description
* Madhu Katragadda Ported for C API
*********************************************************************************
******************************************************************************
*//* C API TEST For COLLATOR */
#include "unicode/utypes.h"
@ -1852,11 +1852,10 @@ static void TestShortString(void)
};
int32_t i = 0, j = 0;
UCollator *coll = NULL, *fromID = NULL, *fromNormalized = NULL;
UCollator *coll = NULL, *fromNormalized = NULL;
UParseError parseError;
UErrorCode status = U_ZERO_ERROR;
char fromShortBuffer[256], fromIDBuffer[256], fromIDRoundtrip[256], normalizedBuffer[256], fromNormalizedBuffer[256];
uint32_t identifier = 0, idFromSS = 0;
char fromShortBuffer[256], normalizedBuffer[256], fromNormalizedBuffer[256];
const char* locale = NULL;
@ -1897,35 +1896,6 @@ static void TestShortString(void)
testCases[i].input, normalizedBuffer);
}
/* test identifiers */
identifier = ucol_collatorToIdentifier(coll, locale, &status);
if(identifier < UCOL_SIT_COLLATOR_NOT_ENCODABLE) {
ucol_identifierToShortString(identifier, fromIDBuffer, 256, FALSE, &status);
fromID = ucol_openFromIdentifier(identifier, FALSE, &status);
if(!ucol_equals(coll, fromID)) {
log_err("Collator from short string ('%s') differs from one obtained through an identifier ('%s')\n",
testCases[i].input, fromIDBuffer);
}
ucol_close(fromID);
}
/* round-trip short string - identifier */
for(j = 1; j < 2; j++) {
idFromSS = ucol_shortStringToIdentifier(testCases[i].input, (UBool)j, &status);
ucol_identifierToShortString(idFromSS, fromIDBuffer, 256, (UBool)j, &status);
identifier = ucol_shortStringToIdentifier(fromIDBuffer, (UBool)j, &status);
ucol_identifierToShortString(identifier, fromIDRoundtrip, 256, (UBool)j, &status);
if(idFromSS != identifier) {
log_err("FD = %i, id didn't round trip. %08X vs %08X (%s)\n",
j, idFromSS, identifier, testCases[i].input);
}
if(strcmp(fromIDBuffer, fromIDRoundtrip)) {
log_err("FD = %i, SS didn't round trip. %s vs %s (%s)\n",
j, fromIDBuffer, fromIDRoundtrip, testCases[i].input);
}
}
ucol_close(fromNormalized);
ucol_close(coll);

View File

@ -1,6 +1,6 @@
/********************************************************************
* COPYRIGHT:
* Copyright (c) 2001-2007, International Business Machines Corporation and
* Copyright (c) 2001-2008, International Business Machines Corporation and
* others. All Rights Reserved.
********************************************************************/
/*******************************************************************************
@ -34,6 +34,7 @@
#include "uassert.h"
#include "unicode/parseerr.h"
#include "unicode/ucnv.h"
#include "unicode/ures.h"
#include "uparse.h"
#define LEN(a) (sizeof(a)/sizeof(a[0]))

View File

@ -1,7 +1,7 @@
/*
*******************************************************************************
*
* Copyright (C) 2000-2007, International Business Machines
* Copyright (C) 2000-2008, International Business Machines
* Corporation and others. All Rights Reserved.
*
*******************************************************************************
@ -984,7 +984,7 @@ struct {
/* produce canonical closure for table */
/* first set up constants for implicit calculation */
uprv_uca_initImplicitConstants(consts.UCA_PRIMARY_IMPLICIT_MIN, consts.UCA_PRIMARY_IMPLICIT_MAX, status);
uprv_uca_initImplicitConstants(status);
/* do the closure */
int32_t noOfClosures = uprv_uca_canonicalClosure(t, NULL, status);
if(noOfClosures != 0) {