// © 2016 and later: Unicode, Inc. and others. // License & terms of use: http://www.unicode.org/copyright.html /* ******************************************************************************* * Copyright (C) 1996-2015, International Business Machines * Corporation and others. All Rights Reserved. ******************************************************************************* * file name: ucol.cpp * encoding: UTF-8 * tab size: 8 (not used) * indentation:4 * * Modification history * Date Name Comments * 1996-1999 various members of ICU team maintained C API for collation framework * 02/16/2001 synwee Added internal method getPrevSpecialCE * 03/01/2001 synwee Added maxexpansion functionality. * 03/16/2001 weiv Collation framework is rewritten in C and made UCA compliant * 2012-2014 markus Rewritten in C++ again. */ #include "unicode/utypes.h" #if !UCONFIG_NO_COLLATION #include "unicode/coll.h" #include "unicode/tblcoll.h" #include "unicode/bytestream.h" #include "unicode/coleitr.h" #include "unicode/ucoleitr.h" #include "unicode/ustring.h" #include "cmemory.h" #include "collation.h" #include "cstring.h" #include "putilimp.h" #include "uassert.h" #include "utracimp.h" U_NAMESPACE_USE U_CAPI UCollator* U_EXPORT2 ucol_openBinary(const uint8_t *bin, int32_t length, const UCollator *base, UErrorCode *status) { if(U_FAILURE(*status)) { return NULL; } RuleBasedCollator *coll = new RuleBasedCollator( bin, length, RuleBasedCollator::rbcFromUCollator(base), *status); if(coll == NULL) { *status = U_MEMORY_ALLOCATION_ERROR; return NULL; } if(U_FAILURE(*status)) { delete coll; return NULL; } return coll->toUCollator(); } U_CAPI int32_t U_EXPORT2 ucol_cloneBinary(const UCollator *coll, uint8_t *buffer, int32_t capacity, UErrorCode *status) { if(U_FAILURE(*status)) { return 0; } const RuleBasedCollator *rbc = RuleBasedCollator::rbcFromUCollator(coll); if(rbc == NULL && coll != NULL) { *status = U_UNSUPPORTED_ERROR; return 0; } return rbc->cloneBinary(buffer, capacity, *status); } U_CAPI UCollator* U_EXPORT2 ucol_safeClone(const UCollator *coll, void * /*stackBuffer*/, int32_t * pBufferSize, UErrorCode *status) { if (status == NULL || U_FAILURE(*status)){ return NULL; } if (coll == NULL) { *status = U_ILLEGAL_ARGUMENT_ERROR; return NULL; } if (pBufferSize != NULL) { int32_t inputSize = *pBufferSize; *pBufferSize = 1; if (inputSize == 0) { return NULL; // preflighting for deprecated functionality } } Collator *newColl = Collator::fromUCollator(coll)->clone(); if (newColl == NULL) { *status = U_MEMORY_ALLOCATION_ERROR; } else { *status = U_SAFECLONE_ALLOCATED_WARNING; } return newColl->toUCollator(); } U_CAPI void U_EXPORT2 ucol_close(UCollator *coll) { UTRACE_ENTRY_OC(UTRACE_UCOL_CLOSE); UTRACE_DATA1(UTRACE_INFO, "coll = %p", coll); if(coll != NULL) { delete Collator::fromUCollator(coll); } UTRACE_EXIT(); } U_CAPI int32_t U_EXPORT2 ucol_mergeSortkeys(const uint8_t *src1, int32_t src1Length, const uint8_t *src2, int32_t src2Length, uint8_t *dest, int32_t destCapacity) { /* check arguments */ if( src1==NULL || src1Length<-1 || src1Length==0 || (src1Length>0 && src1[src1Length-1]!=0) || src2==NULL || src2Length<-1 || src2Length==0 || (src2Length>0 && src2[src2Length-1]!=0) || destCapacity<0 || (destCapacity>0 && dest==NULL) ) { /* error, attempt to write a zero byte and return 0 */ if(dest!=NULL && destCapacity>0) { *dest=0; } return 0; } /* check lengths and capacity */ if(src1Length<0) { src1Length=(int32_t)uprv_strlen((const char *)src1)+1; } if(src2Length<0) { src2Length=(int32_t)uprv_strlen((const char *)src2)+1; } int32_t destLength=src1Length+src2Length; if(destLength>destCapacity) { /* the merged sort key does not fit into the destination */ return destLength; } /* merge the sort keys with the same number of levels */ uint8_t *p=dest; for(;;) { /* copy level from src1 not including 00 or 01 */ uint8_t b; while((b=*src1)>=2) { ++src1; *p++=b; } /* add a 02 merge separator */ *p++=2; /* copy level from src2 not including 00 or 01 */ while((b=*src2)>=2) { ++src2; *p++=b; } /* if both sort keys have another level, then add a 01 level separator and continue */ if(*src1==1 && *src2==1) { ++src1; ++src2; *p++=1; } else { break; } } /* * here, at least one sort key is finished now, but the other one * might have some contents left from containing more levels; * that contents is just appended to the result */ if(*src1!=0) { /* src1 is not finished, therefore *src2==0, and src1 is appended */ src2=src1; } /* append src2, "the other, unfinished sort key" */ while((*p++=*src2++)!=0) {} /* the actual length might be less than destLength if either sort key contained illegally embedded zero bytes */ return (int32_t)(p-dest); } U_CAPI int32_t U_EXPORT2 ucol_getSortKey(const UCollator *coll, const UChar *source, int32_t sourceLength, uint8_t *result, int32_t resultLength) { UTRACE_ENTRY(UTRACE_UCOL_GET_SORTKEY); if (UTRACE_LEVEL(UTRACE_VERBOSE)) { UTRACE_DATA3(UTRACE_VERBOSE, "coll=%p, source string = %vh ", coll, source, ((sourceLength==-1 && source!=NULL) ? u_strlen(source) : sourceLength)); } int32_t keySize = Collator::fromUCollator(coll)-> getSortKey(source, sourceLength, result, resultLength); UTRACE_DATA2(UTRACE_VERBOSE, "Sort Key = %vb", result, keySize); UTRACE_EXIT_VALUE(keySize); return keySize; } U_CAPI int32_t U_EXPORT2 ucol_nextSortKeyPart(const UCollator *coll, UCharIterator *iter, uint32_t state[2], uint8_t *dest, int32_t count, UErrorCode *status) { /* error checking */ if(status==NULL || U_FAILURE(*status)) { return 0; } UTRACE_ENTRY(UTRACE_UCOL_NEXTSORTKEYPART); UTRACE_DATA6(UTRACE_VERBOSE, "coll=%p, iter=%p, state=%d %d, dest=%p, count=%d", coll, iter, state[0], state[1], dest, count); int32_t i = Collator::fromUCollator(coll)-> internalNextSortKeyPart(iter, state, dest, count, *status); // Return number of meaningful sortkey bytes. UTRACE_DATA4(UTRACE_VERBOSE, "dest = %vb, state=%d %d", dest,i, state[0], state[1]); UTRACE_EXIT_VALUE_STATUS(i, *status); return i; } /** * Produce a bound for a given sortkey and a number of levels. */ U_CAPI int32_t U_EXPORT2 ucol_getBound(const uint8_t *source, int32_t sourceLength, UColBoundMode boundType, uint32_t noOfLevels, uint8_t *result, int32_t resultLength, UErrorCode *status) { // consistency checks if(status == NULL || U_FAILURE(*status)) { return 0; } if(source == NULL) { *status = U_ILLEGAL_ARGUMENT_ERROR; return 0; } int32_t sourceIndex = 0; // Scan the string until we skip enough of the key OR reach the end of the key do { sourceIndex++; if(source[sourceIndex] == Collation::LEVEL_SEPARATOR_BYTE) { noOfLevels--; } } while (noOfLevels > 0 && (source[sourceIndex] != 0 || sourceIndex < sourceLength)); if((source[sourceIndex] == 0 || sourceIndex == sourceLength) && noOfLevels > 0) { *status = U_SORT_KEY_TOO_SHORT_WARNING; } // READ ME: this code assumes that the values for boundType // enum will not changes. They are set so that the enum value // corresponds to the number of extra bytes each bound type // needs. if(result != NULL && resultLength >= sourceIndex+boundType) { uprv_memcpy(result, source, sourceIndex); switch(boundType) { // Lower bound just gets terminated. No extra bytes case UCOL_BOUND_LOWER: // = 0 break; // Upper bound needs one extra byte case UCOL_BOUND_UPPER: // = 1 result[sourceIndex++] = 2; break; // Upper long bound needs two extra bytes case UCOL_BOUND_UPPER_LONG: // = 2 result[sourceIndex++] = 0xFF; result[sourceIndex++] = 0xFF; break; default: *status = U_ILLEGAL_ARGUMENT_ERROR; return 0; } result[sourceIndex++] = 0; return sourceIndex; } else { return sourceIndex+boundType+1; } } U_CAPI void U_EXPORT2 ucol_setMaxVariable(UCollator *coll, UColReorderCode group, UErrorCode *pErrorCode) { if(U_FAILURE(*pErrorCode)) { return; } Collator::fromUCollator(coll)->setMaxVariable(group, *pErrorCode); } U_CAPI UColReorderCode U_EXPORT2 ucol_getMaxVariable(const UCollator *coll) { return Collator::fromUCollator(coll)->getMaxVariable(); } U_CAPI uint32_t U_EXPORT2 ucol_setVariableTop(UCollator *coll, const UChar *varTop, int32_t len, UErrorCode *status) { if(U_FAILURE(*status) || coll == NULL) { return 0; } return Collator::fromUCollator(coll)->setVariableTop(varTop, len, *status); } U_CAPI uint32_t U_EXPORT2 ucol_getVariableTop(const UCollator *coll, UErrorCode *status) { if(U_FAILURE(*status) || coll == NULL) { return 0; } return Collator::fromUCollator(coll)->getVariableTop(*status); } U_CAPI void U_EXPORT2 ucol_restoreVariableTop(UCollator *coll, const uint32_t varTop, UErrorCode *status) { if(U_FAILURE(*status) || coll == NULL) { return; } Collator::fromUCollator(coll)->setVariableTop(varTop, *status); } U_CAPI void U_EXPORT2 ucol_setAttribute(UCollator *coll, UColAttribute attr, UColAttributeValue value, UErrorCode *status) { if(U_FAILURE(*status) || coll == NULL) { return; } Collator::fromUCollator(coll)->setAttribute(attr, value, *status); } U_CAPI UColAttributeValue U_EXPORT2 ucol_getAttribute(const UCollator *coll, UColAttribute attr, UErrorCode *status) { if(U_FAILURE(*status) || coll == NULL) { return UCOL_DEFAULT; } return Collator::fromUCollator(coll)->getAttribute(attr, *status); } U_CAPI void U_EXPORT2 ucol_setStrength( UCollator *coll, UCollationStrength strength) { UErrorCode status = U_ZERO_ERROR; ucol_setAttribute(coll, UCOL_STRENGTH, strength, &status); } U_CAPI UCollationStrength U_EXPORT2 ucol_getStrength(const UCollator *coll) { UErrorCode status = U_ZERO_ERROR; return ucol_getAttribute(coll, UCOL_STRENGTH, &status); } U_CAPI int32_t U_EXPORT2 ucol_getReorderCodes(const UCollator *coll, int32_t *dest, int32_t destCapacity, UErrorCode *status) { if (U_FAILURE(*status)) { return 0; } return Collator::fromUCollator(coll)->getReorderCodes(dest, destCapacity, *status); } U_CAPI void U_EXPORT2 ucol_setReorderCodes(UCollator* coll, const int32_t* reorderCodes, int32_t reorderCodesLength, UErrorCode *status) { if (U_FAILURE(*status)) { return; } Collator::fromUCollator(coll)->setReorderCodes(reorderCodes, reorderCodesLength, *status); } U_CAPI int32_t U_EXPORT2 ucol_getEquivalentReorderCodes(int32_t reorderCode, int32_t* dest, int32_t destCapacity, UErrorCode *pErrorCode) { return Collator::getEquivalentReorderCodes(reorderCode, dest, destCapacity, *pErrorCode); } U_CAPI void U_EXPORT2 ucol_getVersion(const UCollator* coll, UVersionInfo versionInfo) { Collator::fromUCollator(coll)->getVersion(versionInfo); } U_CAPI UCollationResult U_EXPORT2 ucol_strcollIter( const UCollator *coll, UCharIterator *sIter, UCharIterator *tIter, UErrorCode *status) { if(!status || U_FAILURE(*status)) { return UCOL_EQUAL; } UTRACE_ENTRY(UTRACE_UCOL_STRCOLLITER); UTRACE_DATA3(UTRACE_VERBOSE, "coll=%p, sIter=%p, tIter=%p", coll, sIter, tIter); if(sIter == NULL || tIter == NULL || coll == NULL) { *status = U_ILLEGAL_ARGUMENT_ERROR; UTRACE_EXIT_VALUE_STATUS(UCOL_EQUAL, *status); return UCOL_EQUAL; } UCollationResult result = Collator::fromUCollator(coll)->compare(*sIter, *tIter, *status); UTRACE_EXIT_VALUE_STATUS(result, *status); return result; } /* */ /* ucol_strcoll Main public API string comparison function */ /* */ U_CAPI UCollationResult U_EXPORT2 ucol_strcoll( const UCollator *coll, const UChar *source, int32_t sourceLength, const UChar *target, int32_t targetLength) { UTRACE_ENTRY(UTRACE_UCOL_STRCOLL); if (UTRACE_LEVEL(UTRACE_VERBOSE)) { UTRACE_DATA3(UTRACE_VERBOSE, "coll=%p, source=%p, target=%p", coll, source, target); UTRACE_DATA2(UTRACE_VERBOSE, "source string = %vh ", source, sourceLength); UTRACE_DATA2(UTRACE_VERBOSE, "target string = %vh ", target, targetLength); } UErrorCode status = U_ZERO_ERROR; UCollationResult returnVal = Collator::fromUCollator(coll)-> compare(source, sourceLength, target, targetLength, status); UTRACE_EXIT_VALUE_STATUS(returnVal, status); return returnVal; } U_CAPI UCollationResult U_EXPORT2 ucol_strcollUTF8( const UCollator *coll, const char *source, int32_t sourceLength, const char *target, int32_t targetLength, UErrorCode *status) { UTRACE_ENTRY(UTRACE_UCOL_STRCOLLUTF8); if (UTRACE_LEVEL(UTRACE_VERBOSE)) { UTRACE_DATA3(UTRACE_VERBOSE, "coll=%p, source=%p, target=%p", coll, source, target); UTRACE_DATA2(UTRACE_VERBOSE, "source string = %vb ", source, sourceLength); UTRACE_DATA2(UTRACE_VERBOSE, "target string = %vb ", target, targetLength); } if (U_FAILURE(*status)) { /* do nothing */ UTRACE_EXIT_VALUE_STATUS(UCOL_EQUAL, *status); return UCOL_EQUAL; } UCollationResult returnVal = Collator::fromUCollator(coll)->internalCompareUTF8( source, sourceLength, target, targetLength, *status); UTRACE_EXIT_VALUE_STATUS(returnVal, *status); return returnVal; } /* convenience function for comparing strings */ U_CAPI UBool U_EXPORT2 ucol_greater( const UCollator *coll, const UChar *source, int32_t sourceLength, const UChar *target, int32_t targetLength) { return (ucol_strcoll(coll, source, sourceLength, target, targetLength) == UCOL_GREATER); } /* convenience function for comparing strings */ U_CAPI UBool U_EXPORT2 ucol_greaterOrEqual( const UCollator *coll, const UChar *source, int32_t sourceLength, const UChar *target, int32_t targetLength) { return (ucol_strcoll(coll, source, sourceLength, target, targetLength) != UCOL_LESS); } /* convenience function for comparing strings */ U_CAPI UBool U_EXPORT2 ucol_equal( const UCollator *coll, const UChar *source, int32_t sourceLength, const UChar *target, int32_t targetLength) { return (ucol_strcoll(coll, source, sourceLength, target, targetLength) == UCOL_EQUAL); } U_CAPI void U_EXPORT2 ucol_getUCAVersion(const UCollator* coll, UVersionInfo info) { const Collator *c = Collator::fromUCollator(coll); if(c != NULL) { UVersionInfo v; c->getVersion(v); // Note: This is tied to how the current implementation encodes the UCA version // in the overall getVersion(). // Alternatively, we could load the root collator and get at lower-level data from there. // Either way, it will reflect the input collator's UCA version only // if it is a known implementation. // It would be cleaner to make this a virtual Collator method. info[0] = v[1] >> 3; info[1] = v[1] & 7; info[2] = v[2] >> 6; info[3] = 0; } } U_CAPI const UChar * U_EXPORT2 ucol_getRules(const UCollator *coll, int32_t *length) { const RuleBasedCollator *rbc = RuleBasedCollator::rbcFromUCollator(coll); // OK to crash if coll==NULL: We do not want to check "this" pointers. if(rbc != NULL || coll == NULL) { const UnicodeString &rules = rbc->getRules(); U_ASSERT(rules.getBuffer()[rules.length()] == 0); *length = rules.length(); return rules.getBuffer(); } static const UChar _NUL = 0; *length = 0; return &_NUL; } U_CAPI int32_t U_EXPORT2 ucol_getRulesEx(const UCollator *coll, UColRuleOption delta, UChar *buffer, int32_t bufferLen) { UnicodeString rules; const RuleBasedCollator *rbc = RuleBasedCollator::rbcFromUCollator(coll); if(rbc != NULL || coll == NULL) { rbc->getRules(delta, rules); } if(buffer != NULL && bufferLen > 0) { UErrorCode errorCode = U_ZERO_ERROR; return rules.extract(buffer, bufferLen, errorCode); } else { return rules.length(); } } U_CAPI const char * U_EXPORT2 ucol_getLocale(const UCollator *coll, ULocDataLocaleType type, UErrorCode *status) { return ucol_getLocaleByType(coll, type, status); } U_CAPI const char * U_EXPORT2 ucol_getLocaleByType(const UCollator *coll, ULocDataLocaleType type, UErrorCode *status) { if(U_FAILURE(*status)) { return NULL; } UTRACE_ENTRY(UTRACE_UCOL_GETLOCALE); UTRACE_DATA1(UTRACE_INFO, "coll=%p", coll); const char *result; const RuleBasedCollator *rbc = RuleBasedCollator::rbcFromUCollator(coll); if(rbc == NULL && coll != NULL) { *status = U_UNSUPPORTED_ERROR; result = NULL; } else { result = rbc->internalGetLocaleID(type, *status); } UTRACE_DATA1(UTRACE_INFO, "result = %s", result); UTRACE_EXIT_STATUS(*status); return result; } U_CAPI USet * U_EXPORT2 ucol_getTailoredSet(const UCollator *coll, UErrorCode *status) { if(U_FAILURE(*status)) { return NULL; } UnicodeSet *set = Collator::fromUCollator(coll)->getTailoredSet(*status); if(U_FAILURE(*status)) { delete set; return NULL; } return set->toUSet(); } U_CAPI UBool U_EXPORT2 ucol_equals(const UCollator *source, const UCollator *target) { return source == target || (*Collator::fromUCollator(source)) == (*Collator::fromUCollator(target)); } #endif /* #if !UCONFIG_NO_COLLATION */