/* ******************************************************************************* * * Copyright (C) 2003, International Business Machines * Corporation and others. All Rights Reserved. * ******************************************************************************* * file name: strprep.cpp * encoding: US-ASCII * tab size: 8 (not used) * indentation:4 * * created on: 2003feb1 * created by: Ram Viswanadha */ #include "unicode/utypes.h" #if !UCONFIG_NO_IDNA #include "strprep.h" #include "utrie.h" #include "umutex.h" #include "cmemory.h" #include "sprpimpl.h" #include "nameprep.h" #include "ustr_imp.h" #include "unicode/unorm.h" #include "unicode/udata.h" #include "unicode/ustring.h" static const uint16_t* mappingData = NULL; static int32_t indexes[_IDNA_INDEX_TOP]={ 0 }; static UBool _isDataLoaded = FALSE; static UTrie idnTrie={ 0,0,0,0,0,0,0 }; static UDataMemory* idnData=NULL; static UErrorCode dataErrorCode =U_ZERO_ERROR; /* file definitions */ static const char DATA_NAME[] = "uidna"; static const char DATA_TYPE[] = "icu"; U_CFUNC UBool ustrprep_cleanup() { if(idnData!=NULL) { udata_close(idnData); idnData=NULL; } dataErrorCode=U_ZERO_ERROR; _isDataLoaded=FALSE; return TRUE; } U_CDECL_BEGIN static UBool U_CALLCONV isAcceptable(void * /* context */, const char * /* type */, const char * /* name */, const UDataInfo *pInfo) { if( pInfo->size>=20 && pInfo->isBigEndian==U_IS_BIG_ENDIAN && pInfo->charsetFamily==U_CHARSET_FAMILY && pInfo->dataFormat[0]==0x49 && /* dataFormat="IDNA" 0x49, 0x44, 0x4e, 0x41 */ pInfo->dataFormat[1]==0x44 && pInfo->dataFormat[2]==0x4e && pInfo->dataFormat[3]==0x41 && pInfo->formatVersion[0]==2 && pInfo->formatVersion[2]==UTRIE_SHIFT && pInfo->formatVersion[3]==UTRIE_INDEX_SHIFT ) { return TRUE; } else { return FALSE; } } static int32_t U_CALLCONV getFoldingOffset(uint32_t data) { if(data&0x8000) { return (int32_t)(data&0x7fff); } else { return 0; } } U_CDECL_END static UBool U_CALLCONV loadData(UErrorCode &errorCode) { /* load Unicode IDNA data from file */ UBool isCached; /* do this because double-checked locking is broken */ umtx_lock(NULL); isCached=_isDataLoaded; umtx_unlock(NULL); if(!isCached) { UTrie _idnTrie={ 0,0,0,0,0,0,0 }; UDataMemory *data; const int32_t *p=NULL; const uint8_t *pb; if(&errorCode==NULL || U_FAILURE(errorCode)) { return 0; } /* open the data outside the mutex block */ //TODO: change the path data=udata_openChoice(NULL, DATA_TYPE, DATA_NAME, isAcceptable, NULL, &errorCode); dataErrorCode=errorCode; if(U_FAILURE(errorCode)) { return _isDataLoaded=FALSE; } p=(const int32_t *)udata_getMemory(data); pb=(const uint8_t *)(p+_IDNA_INDEX_TOP); utrie_unserialize(&_idnTrie, pb, p[_IDNA_INDEX_TRIE_SIZE], &errorCode); _idnTrie.getFoldingOffset=getFoldingOffset; if(U_FAILURE(errorCode)) { dataErrorCode=errorCode; udata_close(data); return _isDataLoaded=FALSE; } /* in the mutex block, set the data for this process */ umtx_lock(NULL); if(idnData==NULL) { idnData=data; data=NULL; uprv_memcpy(&indexes, p, sizeof(indexes)); uprv_memcpy(&idnTrie, &_idnTrie, sizeof(UTrie)); } else { p=(const int32_t *)udata_getMemory(idnData); } umtx_unlock(NULL); /* initialize some variables */ mappingData=(uint16_t *)((uint8_t *)(p+_IDNA_INDEX_TOP)+indexes[_IDNA_INDEX_TRIE_SIZE]); _isDataLoaded = TRUE; /* if a different thread set it first, then close the extra data */ if(data!=NULL) { udata_close(data); /* NULL if it was set correctly */ } } return _isDataLoaded; } // ***************************************************************************** // class StringPrep // ***************************************************************************** U_NAMESPACE_BEGIN const char StringPrep::fgClassID=0; UBool StringPrep::isDataLoaded(UErrorCode& status){ if(U_FAILURE(status)){ return FALSE; } if(_isDataLoaded==FALSE && U_FAILURE(dataErrorCode)){ status = dataErrorCode; return FALSE; } loadData(dataErrorCode); if(U_FAILURE(dataErrorCode)){ status = dataErrorCode; return FALSE; } return TRUE; } StringPrep* StringPrep::createDefaultInstance(UErrorCode& status){ StringPrep* strprep = new StringPrep(); if(!isDataLoaded(status)){ delete strprep; return NULL; } return strprep; } StringPrep* StringPrep::createNameprepInstance(UErrorCode& status){ StringPrep* strprep = new NamePrep(status); if(!isDataLoaded(status)){ delete strprep; return NULL; } return strprep; } UBool StringPrep::isNotProhibited(UChar32 /*ch*/){ return FALSE; } UBool StringPrep::isUnassigned(UChar32 ch){ uint32_t result; UTRIE_GET16(&idnTrie,ch,result); return (result == UIDNA_UNASSIGNED); } static inline void getValues(uint32_t result, int8_t& flag, int8_t& length, int32_t& index){ /* first 3 bits contain the flag */ flag = (int8_t) (result & 0x07); /* next 2 bits contain the length */ length = (int8_t) ((result>>3) & 0x03); /* next 10 bits contain the index */ index = (result>> 5); } int32_t StringPrep::map(const UChar* src, int32_t srcLength, UChar* dest, int32_t destCapacity, UBool allowUnassigned, UParseError* parseError, UErrorCode& status ){ uint32_t result; int8_t flag; int8_t length; int32_t index; int32_t destIndex=0; int32_t srcIndex=0; // check error status if(U_FAILURE(status)){ return 0; } //check arguments if(src==NULL || srcLength<-1 || (dest==NULL && destCapacity!=0)) { status=U_ILLEGAL_ARGUMENT_ERROR; return 0; } if(srcLength == -1){ srcLength = u_strlen(src); } for(;srcIndex through ) are examples of this because they have bidirectional category "EN". In any profile that specifies bidirectional character handling, all three of the following requirements MUST be met: 1) The characters in section 5.8 MUST be prohibited. 2) If a string contains any RandALCat character, the string MUST NOT contain any LCat character. 3) If a string contains any RandALCat character, a RandALCat character MUST be the first character of the string, and a RandALCat character MUST be the last character of the string. */ #define MAX_STACK_BUFFER_SIZE 300 int32_t StringPrep::process(const UChar* src, int32_t srcLength, UChar* dest, int32_t destCapacity, UBool allowUnassigned, UParseError* parseError, UErrorCode& status ){ // check error status if(U_FAILURE(status)){ return 0; } //check arguments if(src==NULL || srcLength<-1 || (dest==NULL && destCapacity!=0)) { status=U_ILLEGAL_ARGUMENT_ERROR; return 0; } UChar b1Stack[MAX_STACK_BUFFER_SIZE], b2Stack[MAX_STACK_BUFFER_SIZE]; UChar *b1 = b1Stack, *b2 = b2Stack; int32_t b1Len, b2Len=0, b1Capacity = MAX_STACK_BUFFER_SIZE , b2Capacity = MAX_STACK_BUFFER_SIZE; uint32_t result; int32_t b2Index = 0; int8_t flag; int8_t length; int32_t index; UCharDirection direction=U_CHAR_DIRECTION_COUNT, firstCharDir=U_CHAR_DIRECTION_COUNT; UBool leftToRight=FALSE, rightToLeft=FALSE; int32_t rtlPos =-1, ltrPos =-1; b1Len = map(src,srcLength, b1, b1Capacity,allowUnassigned, parseError, status); if(status == U_BUFFER_OVERFLOW_ERROR){ // redo processing of string /* we do not have enough room so grow the buffer*/ b1 = (UChar*) uprv_malloc(b1Len * U_SIZEOF_UCHAR); if(b1==NULL){ status = U_MEMORY_ALLOCATION_ERROR; goto CLEANUP; } status = U_ZERO_ERROR; // reset error b1Len = map(src,srcLength, b1, b1Len,allowUnassigned, parseError, status); } b2Len = normalize(b1,b1Len, b2,b2Capacity,status); if(status == U_BUFFER_OVERFLOW_ERROR){ // redo processing of string /* we do not have enough room so grow the buffer*/ b2 = (UChar*) uprv_malloc(b2Len * U_SIZEOF_UCHAR); if(b2==NULL){ status = U_MEMORY_ALLOCATION_ERROR; goto CLEANUP; } status = U_ZERO_ERROR; // reset error b2Len = normalize(b2,b2Len, b2,b2Len,status); } if(U_FAILURE(status)){ goto CLEANUP; } UChar32 ch; for(; b2IndexltrPos) ? rtlPos : ltrPos, b2Len, parseError); goto CLEANUP; } //satisfy 3 if( rightToLeft == TRUE && !((firstCharDir == U_RIGHT_TO_LEFT || firstCharDir == U_RIGHT_TO_LEFT_ARABIC) && (direction == U_RIGHT_TO_LEFT || direction == U_RIGHT_TO_LEFT_ARABIC)) ){ status = U_IDNA_CHECK_BIDI_ERROR; uprv_syntaxError(b2, rtlPos, b2Len, parseError); return FALSE; } if(b2Len <= destCapacity){ uprv_memmove(dest,b2, b2Len*U_SIZEOF_UCHAR); } CLEANUP: if(b1!=b1Stack){ uprv_free(b1); } if(b2!=b2Stack){ uprv_free(b2); } return u_terminateUChars(dest, destCapacity, b2Len, &status); } UBool StringPrep::isLabelSeparator(UChar32 ch, UErrorCode& status){ // check error status if(U_FAILURE(status)){ return FALSE; } if(isDataLoaded(status)){ int32_t result; UTRIE_GET16(&idnTrie,ch, result); if( (result & 0x07) == UIDNA_LABEL_SEPARATOR){ return TRUE; } } return FALSE; } U_NAMESPACE_END #endif /* #if !UCONFIG_NO_IDNA */