2003-07-24 23:23:19 +00:00
/*
* * * * * * * * * * * * * * * * * * * * * * * * * * * * * * * * * * * * * * * * * * * * * * * * * * * * * * * * * * * * * * * * * * * * * * * * * * * * * * *
*
2014-09-11 05:25:13 +00:00
* Copyright ( C ) 2003 - 2014 , International Business Machines
2003-07-24 23:23:19 +00:00
* Corporation and others . All Rights Reserved .
*
* * * * * * * * * * * * * * * * * * * * * * * * * * * * * * * * * * * * * * * * * * * * * * * * * * * * * * * * * * * * * * * * * * * * * * * * * * * * * * *
* file name : usprep . cpp
* encoding : US - ASCII
* tab size : 8 ( not used )
* indentation : 4
*
* created on : 2003 jul2
* created by : Ram Viswanadha
*/
# include "unicode/utypes.h"
# if !UCONFIG_NO_IDNA
# include "unicode/usprep.h"
2014-09-11 15:34:38 +00:00
# include "unicode/normalizer2.h"
2003-07-24 23:23:19 +00:00
# include "unicode/ustring.h"
# include "unicode/uchar.h"
# include "unicode/uversion.h"
# include "umutex.h"
# include "cmemory.h"
# include "sprpimpl.h"
# include "ustr_imp.h"
# include "uhash.h"
# include "cstring.h"
2003-09-22 22:51:37 +00:00
# include "udataswp.h"
2004-10-06 23:10:53 +00:00
# include "ucln_cmn.h"
2004-12-31 13:41:56 +00:00
# include "ubidi_props.h"
2014-09-11 15:34:38 +00:00
# include "uprops.h"
2003-07-24 23:23:19 +00:00
2010-01-21 23:28:30 +00:00
U_NAMESPACE_USE
2003-07-24 23:23:19 +00:00
U_CDECL_BEGIN
/*
Static cache for already opened StringPrep profiles
*/
static UHashtable * SHARED_DATA_HASHTABLE = NULL ;
2013-09-10 00:34:12 +00:00
static icu : : UInitOnce gSharedDataInitOnce ;
2003-07-24 23:23:19 +00:00
2012-10-05 21:22:02 +00:00
static UMutex usprepMutex = U_MUTEX_INITIALIZER ;
2003-07-24 23:23:19 +00:00
2004-04-06 01:51:37 +00:00
/* format version of spp file */
2012-08-10 16:41:38 +00:00
//static uint8_t formatVersion[4]={ 0, 0, 0, 0 };
2004-04-06 01:51:37 +00:00
/* the Unicode version of the sprep data */
static UVersionInfo dataVersion = { 0 , 0 , 0 , 0 } ;
2003-07-24 23:23:19 +00:00
2009-02-02 16:16:07 +00:00
/* Profile names must be aligned to UStringPrepProfileType */
2012-08-10 16:41:38 +00:00
static const char * const PROFILE_NAMES [ ] = {
2009-02-02 16:16:07 +00:00
" rfc3491 " , /* USPREP_RFC3491_NAMEPREP */
" rfc3530cs " , /* USPREP_RFC3530_NFS4_CS_PREP */
" rfc3530csci " , /* USPREP_RFC3530_NFS4_CS_PREP_CI */
" rfc3491 " , /* USPREP_RFC3530_NSF4_CIS_PREP */
" rfc3530mixp " , /* USPREP_RFC3530_NSF4_MIXED_PREP_PREFIX */
" rfc3491 " , /* USPREP_RFC3530_NSF4_MIXED_PREP_SUFFIX */
" rfc3722 " , /* USPREP_RFC3722_ISCSI */
" rfc3920node " , /* USPREP_RFC3920_NODEPREP */
" rfc3920res " , /* USPREP_RFC3920_RESOURCEPREP */
" rfc4011 " , /* USPREP_RFC4011_MIB */
" rfc4013 " , /* USPREP_RFC4013_SASLPREP */
" rfc4505 " , /* USPREP_RFC4505_TRACE */
" rfc4518 " , /* USPREP_RFC4518_LDAP */
" rfc4518ci " , /* USPREP_RFC4518_LDAP_CI */
} ;
2003-07-24 23:23:19 +00:00
static UBool U_CALLCONV
2004-04-08 22:49:40 +00:00
isSPrepAcceptable ( void * /* context */ ,
2003-07-24 23:23:19 +00:00
const char * /* type */ ,
const char * /* name */ ,
const UDataInfo * pInfo ) {
if (
pInfo - > size > = 20 & &
pInfo - > isBigEndian = = U_IS_BIG_ENDIAN & &
pInfo - > charsetFamily = = U_CHARSET_FAMILY & &
pInfo - > dataFormat [ 0 ] = = 0x53 & & /* dataFormat="SPRP" */
pInfo - > dataFormat [ 1 ] = = 0x50 & &
pInfo - > dataFormat [ 2 ] = = 0x52 & &
pInfo - > dataFormat [ 3 ] = = 0x50 & &
pInfo - > formatVersion [ 0 ] = = 3 & &
pInfo - > formatVersion [ 2 ] = = UTRIE_SHIFT & &
pInfo - > formatVersion [ 3 ] = = UTRIE_INDEX_SHIFT
) {
2012-08-10 16:41:38 +00:00
//uprv_memcpy(formatVersion, pInfo->formatVersion, 4);
2004-04-06 01:51:37 +00:00
uprv_memcpy ( dataVersion , pInfo - > dataVersion , 4 ) ;
2003-07-24 23:23:19 +00:00
return TRUE ;
} else {
return FALSE ;
}
}
static int32_t U_CALLCONV
2004-04-08 22:49:40 +00:00
getSPrepFoldingOffset ( uint32_t data ) {
2003-07-24 23:23:19 +00:00
return ( int32_t ) data ;
}
/* hashes an entry */
2005-07-07 17:50:37 +00:00
static int32_t U_CALLCONV
2003-07-24 23:23:19 +00:00
hashEntry ( const UHashTok parm ) {
UStringPrepKey * b = ( UStringPrepKey * ) parm . pointer ;
UHashTok namekey , pathkey ;
namekey . pointer = b - > name ;
pathkey . pointer = b - > path ;
return uhash_hashChars ( namekey ) + 37 * uhash_hashChars ( pathkey ) ;
}
/* compares two entries */
2005-07-07 17:50:37 +00:00
static UBool U_CALLCONV
2003-07-24 23:23:19 +00:00
compareEntries ( const UHashTok p1 , const UHashTok p2 ) {
UStringPrepKey * b1 = ( UStringPrepKey * ) p1 . pointer ;
UStringPrepKey * b2 = ( UStringPrepKey * ) p2 . pointer ;
UHashTok name1 , name2 , path1 , path2 ;
name1 . pointer = b1 - > name ;
name2 . pointer = b2 - > name ;
path1 . pointer = b1 - > path ;
path2 . pointer = b2 - > path ;
return ( ( UBool ) ( uhash_compareChars ( name1 , name2 ) &
uhash_compareChars ( path1 , path2 ) ) ) ;
}
2004-10-06 23:10:53 +00:00
static void
usprep_unload ( UStringPrepProfile * data ) {
udata_close ( data - > sprepData ) ;
}
static int32_t
usprep_internal_flushCache ( UBool noRefCount ) {
UStringPrepProfile * profile = NULL ;
UStringPrepKey * key = NULL ;
int32_t pos = - 1 ;
int32_t deletedNum = 0 ;
const UHashElement * e ;
/*
* if shared data hasn ' t even been lazy evaluated yet
* return 0
*/
umtx_lock ( & usprepMutex ) ;
if ( SHARED_DATA_HASHTABLE = = NULL ) {
umtx_unlock ( & usprepMutex ) ;
return 0 ;
}
/*creates an enumeration to iterate through every element in the table */
while ( ( e = uhash_nextElement ( SHARED_DATA_HASHTABLE , & pos ) ) ! = NULL )
{
profile = ( UStringPrepProfile * ) e - > value . pointer ;
key = ( UStringPrepKey * ) e - > key . pointer ;
if ( ( noRefCount = = FALSE & & profile - > refCount = = 0 ) | |
noRefCount = = TRUE ) {
deletedNum + + ;
uhash_removeElement ( SHARED_DATA_HASHTABLE , e ) ;
/* unload the data */
usprep_unload ( profile ) ;
if ( key - > name ! = NULL ) {
uprv_free ( key - > name ) ;
key - > name = NULL ;
}
if ( key - > path ! = NULL ) {
uprv_free ( key - > path ) ;
key - > path = NULL ;
}
uprv_free ( profile ) ;
uprv_free ( key ) ;
}
}
umtx_unlock ( & usprepMutex ) ;
return deletedNum ;
}
/* Works just like ucnv_flushCache()
static int32_t
usprep_flushCache ( ) {
return usprep_internal_flushCache ( FALSE ) ;
}
*/
static UBool U_CALLCONV usprep_cleanup ( void ) {
if ( SHARED_DATA_HASHTABLE ! = NULL ) {
usprep_internal_flushCache ( TRUE ) ;
if ( SHARED_DATA_HASHTABLE ! = NULL & & uhash_count ( SHARED_DATA_HASHTABLE ) = = 0 ) {
uhash_close ( SHARED_DATA_HASHTABLE ) ;
SHARED_DATA_HASHTABLE = NULL ;
}
}
2013-06-01 03:37:16 +00:00
gSharedDataInitOnce . reset ( ) ;
2004-10-06 23:10:53 +00:00
return ( SHARED_DATA_HASHTABLE = = NULL ) ;
}
2003-07-25 16:01:32 +00:00
U_CDECL_END
2003-07-24 23:23:19 +00:00
/** Initializes the cache for resources */
2013-06-01 03:37:16 +00:00
static void U_CALLCONV
createCache ( UErrorCode & status ) {
SHARED_DATA_HASHTABLE = uhash_open ( hashEntry , compareEntries , NULL , & status ) ;
if ( U_FAILURE ( status ) ) {
SHARED_DATA_HASHTABLE = NULL ;
}
ucln_common_registerCleanup ( UCLN_COMMON_USPREP , usprep_cleanup ) ;
}
2003-07-24 23:23:19 +00:00
static void
initCache ( UErrorCode * status ) {
2013-06-01 03:37:16 +00:00
umtx_initOnce ( gSharedDataInitOnce , & createCache , * status ) ;
2003-07-24 23:23:19 +00:00
}
static UBool U_CALLCONV
loadData ( UStringPrepProfile * profile ,
const char * path ,
const char * name ,
const char * type ,
UErrorCode * errorCode ) {
/* load Unicode SPREP data from file */
UTrie _sprepTrie = { 0 , 0 , 0 , 0 , 0 , 0 , 0 } ;
UDataMemory * dataMemory ;
const int32_t * p = NULL ;
const uint8_t * pb ;
2004-04-06 01:51:37 +00:00
UVersionInfo normUnicodeVersion ;
int32_t normUniVer , sprepUniVer , normCorrVer ;
2003-07-24 23:23:19 +00:00
if ( errorCode = = NULL | | U_FAILURE ( * errorCode ) ) {
return 0 ;
}
/* open the data outside the mutex block */
//TODO: change the path
2004-04-08 22:49:40 +00:00
dataMemory = udata_openChoice ( path , type , name , isSPrepAcceptable , NULL , errorCode ) ;
2003-07-24 23:23:19 +00:00
if ( U_FAILURE ( * errorCode ) ) {
return FALSE ;
}
p = ( const int32_t * ) udata_getMemory ( dataMemory ) ;
pb = ( const uint8_t * ) ( p + _SPREP_INDEX_TOP ) ;
utrie_unserialize ( & _sprepTrie , pb , p [ _SPREP_INDEX_TRIE_SIZE ] , errorCode ) ;
2004-04-08 22:49:40 +00:00
_sprepTrie . getFoldingOffset = getSPrepFoldingOffset ;
2003-07-24 23:23:19 +00:00
if ( U_FAILURE ( * errorCode ) ) {
udata_close ( dataMemory ) ;
return FALSE ;
}
/* in the mutex block, set the data for this process */
umtx_lock ( & usprepMutex ) ;
if ( profile - > sprepData = = NULL ) {
profile - > sprepData = dataMemory ;
dataMemory = NULL ;
uprv_memcpy ( & profile - > indexes , p , sizeof ( profile - > indexes ) ) ;
uprv_memcpy ( & profile - > sprepTrie , & _sprepTrie , sizeof ( UTrie ) ) ;
} else {
p = ( const int32_t * ) udata_getMemory ( profile - > sprepData ) ;
}
umtx_unlock ( & usprepMutex ) ;
/* initialize some variables */
profile - > mappingData = ( uint16_t * ) ( ( uint8_t * ) ( p + _SPREP_INDEX_TOP ) + profile - > indexes [ _SPREP_INDEX_TRIE_SIZE ] ) ;
2010-02-13 23:15:05 +00:00
u_getUnicodeVersion ( normUnicodeVersion ) ;
2004-04-06 01:51:37 +00:00
normUniVer = ( normUnicodeVersion [ 0 ] < < 24 ) + ( normUnicodeVersion [ 1 ] < < 16 ) +
( normUnicodeVersion [ 2 ] < < 8 ) + ( normUnicodeVersion [ 3 ] ) ;
sprepUniVer = ( dataVersion [ 0 ] < < 24 ) + ( dataVersion [ 1 ] < < 16 ) +
( dataVersion [ 2 ] < < 8 ) + ( dataVersion [ 3 ] ) ;
normCorrVer = profile - > indexes [ _SPREP_NORM_CORRECTNS_LAST_UNI_VERSION ] ;
if ( U_FAILURE ( * errorCode ) ) {
udata_close ( dataMemory ) ;
return FALSE ;
}
if ( normUniVer < sprepUniVer & & /* the Unicode version of SPREP file must be less than the Unicode Vesion of the normalization data */
normUniVer < normCorrVer & & /* the Unicode version of the NormalizationCorrections.txt file should be less than the Unicode Vesion of the normalization data */
2003-07-24 23:23:19 +00:00
( ( profile - > indexes [ _SPREP_OPTIONS ] & _SPREP_NORMALIZATION_ON ) > 0 ) /* normalization turned on*/
) {
* errorCode = U_INVALID_FORMAT_ERROR ;
udata_close ( dataMemory ) ;
return FALSE ;
}
profile - > isDataLoaded = TRUE ;
/* if a different thread set it first, then close the extra data */
if ( dataMemory ! = NULL ) {
udata_close ( dataMemory ) ; /* NULL if it was set correctly */
}
return profile - > isDataLoaded ;
}
static UStringPrepProfile *
usprep_getProfile ( const char * path ,
const char * name ,
UErrorCode * status ) {
UStringPrepProfile * profile = NULL ;
initCache ( status ) ;
if ( U_FAILURE ( * status ) ) {
return NULL ;
}
UStringPrepKey stackKey ;
/*
* const is cast way to save malloc , strcpy and free calls
* we use the passed in pointers for fetching the data from the
* hash table which is safe
*/
stackKey . name = ( char * ) name ;
stackKey . path = ( char * ) path ;
/* fetch the data from the cache */
2006-03-28 04:08:34 +00:00
umtx_lock ( & usprepMutex ) ;
2003-07-24 23:23:19 +00:00
profile = ( UStringPrepProfile * ) ( uhash_get ( SHARED_DATA_HASHTABLE , & stackKey ) ) ;
2009-11-21 22:04:55 +00:00
if ( profile ! = NULL ) {
profile - > refCount + + ;
}
2006-03-28 04:08:34 +00:00
umtx_unlock ( & usprepMutex ) ;
2003-07-24 23:23:19 +00:00
2009-11-21 22:04:55 +00:00
if ( profile = = NULL ) {
2003-07-24 23:23:19 +00:00
/* else load the data and put the data in the cache */
2009-11-21 22:04:55 +00:00
LocalMemory < UStringPrepProfile > newProfile ;
if ( newProfile . allocateInsteadAndReset ( ) = = NULL ) {
2003-07-24 23:23:19 +00:00
* status = U_MEMORY_ALLOCATION_ERROR ;
return NULL ;
}
/* load the data */
2009-11-21 22:04:55 +00:00
if ( ! loadData ( newProfile . getAlias ( ) , path , name , _SPREP_DATA_TYPE , status ) | | U_FAILURE ( * status ) ) {
2003-07-24 23:23:19 +00:00
return NULL ;
}
2010-09-21 00:12:49 +00:00
2003-09-02 22:32:17 +00:00
/* get the options */
2009-11-21 22:04:55 +00:00
newProfile - > doNFKC = ( UBool ) ( ( newProfile - > indexes [ _SPREP_OPTIONS ] & _SPREP_NORMALIZATION_ON ) > 0 ) ;
newProfile - > checkBiDi = ( UBool ) ( ( newProfile - > indexes [ _SPREP_OPTIONS ] & _SPREP_CHECK_BIDI_ON ) > 0 ) ;
2004-12-31 13:41:56 +00:00
2009-11-21 22:04:55 +00:00
if ( newProfile - > checkBiDi ) {
2010-09-21 00:12:49 +00:00
newProfile - > bdp = ubidi_getSingleton ( ) ;
2004-12-31 13:41:56 +00:00
}
2009-11-21 22:04:55 +00:00
LocalMemory < UStringPrepKey > key ;
LocalMemory < char > keyName ;
LocalMemory < char > keyPath ;
if ( key . allocateInsteadAndReset ( ) = = NULL | |
keyName . allocateInsteadAndCopy ( uprv_strlen ( name ) + 1 ) = = NULL | |
( path ! = NULL & &
keyPath . allocateInsteadAndCopy ( uprv_strlen ( path ) + 1 ) = = NULL )
) {
* status = U_MEMORY_ALLOCATION_ERROR ;
usprep_unload ( newProfile . getAlias ( ) ) ;
return NULL ;
}
2003-07-24 23:23:19 +00:00
umtx_lock ( & usprepMutex ) ;
2010-09-16 00:49:01 +00:00
// If another thread already inserted the same key/value, refcount and cleanup our thread data
profile = ( UStringPrepProfile * ) ( uhash_get ( SHARED_DATA_HASHTABLE , & stackKey ) ) ;
if ( profile ! = NULL ) {
profile - > refCount + + ;
usprep_unload ( newProfile . getAlias ( ) ) ;
}
else {
/* initialize the key members */
key - > name = keyName . orphan ( ) ;
uprv_strcpy ( key - > name , name ) ;
if ( path ! = NULL ) {
key - > path = keyPath . orphan ( ) ;
uprv_strcpy ( key - > path , path ) ;
}
profile = newProfile . orphan ( ) ;
/* add the data object to the cache */
profile - > refCount = 1 ;
uhash_put ( SHARED_DATA_HASHTABLE , key . orphan ( ) , profile , status ) ;
}
2003-07-24 23:23:19 +00:00
umtx_unlock ( & usprepMutex ) ;
}
return profile ;
}
U_CAPI UStringPrepProfile * U_EXPORT2
usprep_open ( const char * path ,
const char * name ,
UErrorCode * status ) {
if ( status = = NULL | | U_FAILURE ( * status ) ) {
return NULL ;
}
/* initialize the profile struct members */
2006-05-27 04:01:15 +00:00
return usprep_getProfile ( path , name , status ) ;
2003-07-24 23:23:19 +00:00
}
2009-02-02 16:16:07 +00:00
U_CAPI UStringPrepProfile * U_EXPORT2
usprep_openByType ( UStringPrepProfileType type ,
UErrorCode * status ) {
if ( status = = NULL | | U_FAILURE ( * status ) ) {
return NULL ;
}
int32_t index = ( int32_t ) type ;
2009-03-27 17:08:12 +00:00
if ( index < 0 | | index > = ( int32_t ) ( sizeof ( PROFILE_NAMES ) / sizeof ( PROFILE_NAMES [ 0 ] ) ) ) {
2009-02-02 16:16:07 +00:00
* status = U_ILLEGAL_ARGUMENT_ERROR ;
return NULL ;
}
return usprep_open ( NULL , PROFILE_NAMES [ index ] , status ) ;
}
2003-07-24 23:23:19 +00:00
U_CAPI void U_EXPORT2
usprep_close ( UStringPrepProfile * profile ) {
if ( profile = = NULL ) {
return ;
}
umtx_lock ( & usprepMutex ) ;
/* decrement the ref count*/
if ( profile - > refCount > 0 ) {
profile - > refCount - - ;
}
umtx_unlock ( & usprepMutex ) ;
}
U_CFUNC void
uprv_syntaxError ( const UChar * rules ,
int32_t pos ,
int32_t rulesLen ,
UParseError * parseError ) {
if ( parseError = = NULL ) {
return ;
}
parseError - > offset = pos ;
parseError - > line = 0 ; // we are not using line numbers
// for pre-context
2007-08-29 02:57:42 +00:00
int32_t start = ( pos < U_PARSE_CONTEXT_LEN ) ? 0 : ( pos - ( U_PARSE_CONTEXT_LEN - 1 ) ) ;
2004-02-24 05:26:03 +00:00
int32_t limit = pos ;
2003-07-24 23:23:19 +00:00
2004-02-24 05:26:03 +00:00
u_memcpy ( parseError - > preContext , rules + start , limit - start ) ;
2003-07-24 23:23:19 +00:00
//null terminate the buffer
2004-02-24 05:26:03 +00:00
parseError - > preContext [ limit - start ] = 0 ;
2003-07-24 23:23:19 +00:00
2004-02-24 05:26:03 +00:00
// for post-context; include error rules[pos]
2003-07-24 23:23:19 +00:00
start = pos ;
2004-02-24 05:26:03 +00:00
limit = start + ( U_PARSE_CONTEXT_LEN - 1 ) ;
if ( limit > rulesLen ) {
limit = rulesLen ;
2003-07-24 23:23:19 +00:00
}
2004-02-24 05:26:03 +00:00
if ( start < rulesLen ) {
u_memcpy ( parseError - > postContext , rules + start , limit - start ) ;
2003-07-24 23:23:19 +00:00
}
2004-02-24 05:26:03 +00:00
//null terminate the buffer
parseError - > postContext [ limit - start ] = 0 ;
2003-07-24 23:23:19 +00:00
}
static inline UStringPrepType
getValues ( uint16_t trieWord , int16_t & value , UBool & isIndex ) {
UStringPrepType type ;
if ( trieWord = = 0 ) {
/*
* Initial value stored in the mapping table
* just return USPREP_TYPE_LIMIT . . so that
* the source codepoint is copied to the destination
*/
type = USPREP_TYPE_LIMIT ;
2006-04-14 16:01:20 +00:00
isIndex = FALSE ;
value = 0 ;
2003-07-24 23:23:19 +00:00
} else if ( trieWord > = _SPREP_TYPE_THRESHOLD ) {
type = ( UStringPrepType ) ( trieWord - _SPREP_TYPE_THRESHOLD ) ;
2006-04-14 16:01:20 +00:00
isIndex = FALSE ;
value = 0 ;
2003-07-24 23:23:19 +00:00
} else {
/* get the type */
type = USPREP_MAP ;
/* ascertain if the value is index or delta */
if ( trieWord & 0x02 ) {
isIndex = TRUE ;
value = trieWord > > 2 ; //mask off the lower 2 bits and shift
} else {
isIndex = FALSE ;
value = ( int16_t ) trieWord ;
value = ( value > > 2 ) ;
}
if ( ( trieWord > > 2 ) = = _SPREP_MAX_INDEX_VALUE ) {
type = USPREP_DELETE ;
isIndex = FALSE ;
value = 0 ;
}
}
return type ;
}
2014-09-11 15:34:38 +00:00
// TODO: change to writing to UnicodeString not UChar *
2003-07-24 23:23:19 +00:00
static int32_t
usprep_map ( const UStringPrepProfile * profile ,
const UChar * src , int32_t srcLength ,
UChar * dest , int32_t destCapacity ,
int32_t options ,
UParseError * parseError ,
UErrorCode * status ) {
uint16_t result ;
int32_t destIndex = 0 ;
int32_t srcIndex ;
UBool allowUnassigned = ( UBool ) ( ( options & USPREP_ALLOW_UNASSIGNED ) > 0 ) ;
UStringPrepType type ;
int16_t value ;
UBool isIndex ;
2003-12-02 03:03:23 +00:00
const int32_t * indexes = profile - > indexes ;
2003-07-24 23:23:19 +00:00
// no error checking the caller check for error and arguments
// no string length check the caller finds out the string length
for ( srcIndex = 0 ; srcIndex < srcLength ; ) {
UChar32 ch ;
U16_NEXT ( src , srcIndex , srcLength , ch ) ;
result = 0 ;
UTRIE_GET16 ( & profile - > sprepTrie , ch , result ) ;
type = getValues ( result , value , isIndex ) ;
// check if the source codepoint is unassigned
if ( type = = USPREP_UNASSIGNED & & allowUnassigned = = FALSE ) {
uprv_syntaxError ( src , srcIndex - U16_LENGTH ( ch ) , srcLength , parseError ) ;
* status = U_STRINGPREP_UNASSIGNED_ERROR ;
return 0 ;
} else if ( type = = USPREP_MAP ) {
int32_t index , length ;
if ( isIndex ) {
index = value ;
if ( index > = indexes [ _SPREP_ONE_UCHAR_MAPPING_INDEX_START ] & &
index < indexes [ _SPREP_TWO_UCHARS_MAPPING_INDEX_START ] ) {
length = 1 ;
} else if ( index > = indexes [ _SPREP_TWO_UCHARS_MAPPING_INDEX_START ] & &
index < indexes [ _SPREP_THREE_UCHARS_MAPPING_INDEX_START ] ) {
length = 2 ;
} else if ( index > = indexes [ _SPREP_THREE_UCHARS_MAPPING_INDEX_START ] & &
index < indexes [ _SPREP_FOUR_UCHARS_MAPPING_INDEX_START ] ) {
length = 3 ;
} else {
length = profile - > mappingData [ index + + ] ;
}
/* copy mapping to destination */
for ( int32_t i = 0 ; i < length ; i + + ) {
if ( destIndex < destCapacity ) {
dest [ destIndex ] = profile - > mappingData [ index + i ] ;
}
destIndex + + ; /* for pre-flighting */
}
continue ;
} else {
// subtract the delta to arrive at the code point
ch - = value ;
}
} else if ( type = = USPREP_DELETE ) {
// just consume the codepoint and contine
continue ;
}
//copy the code point into destination
if ( ch < = 0xFFFF ) {
if ( destIndex < destCapacity ) {
dest [ destIndex ] = ( UChar ) ch ;
}
destIndex + + ;
} else {
if ( destIndex + 1 < destCapacity ) {
dest [ destIndex ] = U16_LEAD ( ch ) ;
dest [ destIndex + 1 ] = U16_TRAIL ( ch ) ;
}
destIndex + = 2 ;
}
}
return u_terminateUChars ( dest , destCapacity , destIndex , status ) ;
}
2014-09-11 15:34:38 +00:00
/*
2003-07-24 23:23:19 +00:00
1 ) Map - - For each character in the input , check if it has a mapping
and , if so , replace it with its mapping .
2 ) Normalize - - Possibly normalize the result of step 1 using Unicode
normalization .
3 ) Prohibit - - Check for any characters that are not allowed in the
output . If any are found , return an error .
4 ) Check bidi - - Possibly check for right - to - left characters , and if
any are found , make sure that the whole string satisfies the
requirements for bidirectional strings . If the string does not
satisfy the requirements for bidirectional strings , return an
error .
[ Unicode3 .2 ] defines several bidirectional categories ; each character
has one bidirectional category assigned to it . For the purposes of
the requirements below , an " RandALCat character " is a character that
has Unicode bidirectional categories " R " or " AL " ; an " LCat character "
is a character that has Unicode bidirectional category " L " . Note
that there are many characters which fall in neither of the above
definitions ; Latin digits ( < U + 0030 > through < U + 003 9 > ) are examples of
this because they have bidirectional category " EN " .
In any profile that specifies bidirectional character handling , all
three of the following requirements MUST be met :
1 ) The characters in section 5.8 MUST be prohibited .
2 ) If a string contains any RandALCat character , the string MUST NOT
contain any LCat character .
3 ) If a string contains any RandALCat character , a RandALCat
character MUST be the first character of the string , and a
RandALCat character MUST be the last character of the string .
*/
U_CAPI int32_t U_EXPORT2
usprep_prepare ( const UStringPrepProfile * profile ,
const UChar * src , int32_t srcLength ,
UChar * dest , int32_t destCapacity ,
int32_t options ,
UParseError * parseError ,
UErrorCode * status ) {
// check error status
2014-09-11 15:34:38 +00:00
if ( U_FAILURE ( * status ) ) {
2003-07-24 23:23:19 +00:00
return 0 ;
}
2014-09-11 15:34:38 +00:00
2003-07-24 23:23:19 +00:00
//check arguments
2014-09-11 15:34:38 +00:00
if ( profile = = NULL | |
( src = = NULL ? srcLength ! = 0 : srcLength < - 1 ) | |
( dest = = NULL ? destCapacity ! = 0 : destCapacity < 0 ) ) {
2003-07-24 23:23:19 +00:00
* status = U_ILLEGAL_ARGUMENT_ERROR ;
return 0 ;
}
//get the string length
2014-09-11 15:34:38 +00:00
if ( srcLength < 0 ) {
2003-07-24 23:23:19 +00:00
srcLength = u_strlen ( src ) ;
}
// map
2014-09-11 15:34:38 +00:00
UnicodeString s1 ;
UChar * b1 = s1 . getBuffer ( srcLength ) ;
if ( b1 = = NULL ) {
* status = U_MEMORY_ALLOCATION_ERROR ;
return 0 ;
}
int32_t b1Len = usprep_map ( profile , src , srcLength ,
b1 , s1 . getCapacity ( ) , options , parseError , status ) ;
s1 . releaseBuffer ( U_SUCCESS ( * status ) ? b1Len : 0 ) ;
2003-07-24 23:23:19 +00:00
if ( * status = = U_BUFFER_OVERFLOW_ERROR ) {
// redo processing of string
/* we do not have enough room so grow the buffer*/
2014-09-11 15:34:38 +00:00
b1 = s1 . getBuffer ( b1Len ) ;
2003-07-24 23:23:19 +00:00
if ( b1 = = NULL ) {
* status = U_MEMORY_ALLOCATION_ERROR ;
2014-09-11 15:34:38 +00:00
return 0 ;
2003-07-24 23:23:19 +00:00
}
* status = U_ZERO_ERROR ; // reset error
2014-09-11 15:34:38 +00:00
b1Len = usprep_map ( profile , src , srcLength ,
b1 , s1 . getCapacity ( ) , options , parseError , status ) ;
s1 . releaseBuffer ( U_SUCCESS ( * status ) ? b1Len : 0 ) ;
}
if ( U_FAILURE ( * status ) ) {
return 0 ;
2003-07-24 23:23:19 +00:00
}
// normalize
2014-09-11 15:34:38 +00:00
UnicodeString s2 ;
if ( profile - > doNFKC ) {
const Normalizer2 * n2 = Normalizer2 : : getNFKCInstance ( * status ) ;
FilteredNormalizer2 fn2 ( * n2 , * uniset_getUnicode32Instance ( * status ) ) ;
if ( U_FAILURE ( * status ) ) {
return 0 ;
2003-08-25 22:06:59 +00:00
}
2014-09-11 15:34:38 +00:00
fn2 . normalize ( s1 , s2 , * status ) ;
2003-07-24 23:23:19 +00:00
} else {
2014-09-11 15:34:38 +00:00
s2 . fastCopyFrom ( s1 ) ;
2003-07-24 23:23:19 +00:00
}
if ( U_FAILURE ( * status ) ) {
2014-09-11 15:34:38 +00:00
return 0 ;
2003-07-24 23:23:19 +00:00
}
// Prohibit and checkBiDi in one pass
2014-09-11 15:34:38 +00:00
const UChar * b2 = s2 . getBuffer ( ) ;
int32_t b2Len = s2 . length ( ) ;
UCharDirection direction = U_CHAR_DIRECTION_COUNT , firstCharDir = U_CHAR_DIRECTION_COUNT ;
UBool leftToRight = FALSE , rightToLeft = FALSE ;
int32_t rtlPos = - 1 , ltrPos = - 1 ;
2003-07-24 23:23:19 +00:00
2014-09-11 15:34:38 +00:00
for ( int32_t b2Index = 0 ; b2Index < b2Len ; ) {
UChar32 ch = 0 ;
2003-07-24 23:23:19 +00:00
U16_NEXT ( b2 , b2Index , b2Len , ch ) ;
2014-09-11 15:34:38 +00:00
uint16_t result ;
2003-07-24 23:23:19 +00:00
UTRIE_GET16 ( & profile - > sprepTrie , ch , result ) ;
2014-09-11 15:34:38 +00:00
int16_t value ;
UBool isIndex ;
UStringPrepType type = getValues ( result , value , isIndex ) ;
2003-07-24 23:23:19 +00:00
if ( type = = USPREP_PROHIBITED | |
2003-08-26 23:49:04 +00:00
( ( result < _SPREP_TYPE_THRESHOLD ) & & ( result & 0x01 ) /* first bit says it the code point is prohibited*/ )
) {
2003-07-24 23:23:19 +00:00
* status = U_STRINGPREP_PROHIBITED_ERROR ;
uprv_syntaxError ( b1 , b2Index - U16_LENGTH ( ch ) , b2Len , parseError ) ;
2014-09-11 15:34:38 +00:00
return 0 ;
2003-07-24 23:23:19 +00:00
}
2004-12-31 13:41:56 +00:00
if ( profile - > checkBiDi ) {
direction = ubidi_getClass ( profile - > bdp , ch ) ;
if ( firstCharDir = = U_CHAR_DIRECTION_COUNT ) {
firstCharDir = direction ;
}
if ( direction = = U_LEFT_TO_RIGHT ) {
leftToRight = TRUE ;
ltrPos = b2Index - 1 ;
}
if ( direction = = U_RIGHT_TO_LEFT | | direction = = U_RIGHT_TO_LEFT_ARABIC ) {
rightToLeft = TRUE ;
rtlPos = b2Index - 1 ;
}
2003-07-24 23:23:19 +00:00
}
2014-09-11 15:34:38 +00:00
}
2003-09-02 22:32:17 +00:00
if ( profile - > checkBiDi = = TRUE ) {
2003-07-24 23:23:19 +00:00
// satisfy 2
if ( leftToRight = = TRUE & & rightToLeft = = TRUE ) {
* status = U_STRINGPREP_CHECK_BIDI_ERROR ;
uprv_syntaxError ( b2 , ( rtlPos > ltrPos ) ? rtlPos : ltrPos , b2Len , parseError ) ;
2014-09-11 15:34:38 +00:00
return 0 ;
2003-07-24 23:23:19 +00:00
}
//satisfy 3
if ( rightToLeft = = TRUE & &
! ( ( firstCharDir = = U_RIGHT_TO_LEFT | | firstCharDir = = U_RIGHT_TO_LEFT_ARABIC ) & &
( direction = = U_RIGHT_TO_LEFT | | direction = = U_RIGHT_TO_LEFT_ARABIC ) )
) {
* status = U_STRINGPREP_CHECK_BIDI_ERROR ;
uprv_syntaxError ( b2 , rtlPos , b2Len , parseError ) ;
return FALSE ;
}
}
2014-09-11 15:34:38 +00:00
return s2 . extract ( dest , destCapacity , * status ) ;
2003-07-24 23:23:19 +00:00
}
2003-09-22 22:51:37 +00:00
/* data swapping ------------------------------------------------------------ */
2004-01-25 19:29:22 +00:00
U_CAPI int32_t U_EXPORT2
2003-09-22 22:51:37 +00:00
usprep_swap ( const UDataSwapper * ds ,
const void * inData , int32_t length , void * outData ,
UErrorCode * pErrorCode ) {
const UDataInfo * pInfo ;
int32_t headerSize ;
const uint8_t * inBytes ;
uint8_t * outBytes ;
const int32_t * inIndexes ;
int32_t indexes [ 16 ] ;
int32_t i , offset , count , size ;
/* udata_swapDataHeader checks the arguments */
headerSize = udata_swapDataHeader ( ds , inData , length , outData , pErrorCode ) ;
if ( pErrorCode = = NULL | | U_FAILURE ( * pErrorCode ) ) {
return 0 ;
}
/* check data format and format version */
pInfo = ( const UDataInfo * ) ( ( const char * ) inData + 4 ) ;
if ( ! (
pInfo - > dataFormat [ 0 ] = = 0x53 & & /* dataFormat="SPRP" */
pInfo - > dataFormat [ 1 ] = = 0x50 & &
pInfo - > dataFormat [ 2 ] = = 0x52 & &
pInfo - > dataFormat [ 3 ] = = 0x50 & &
pInfo - > formatVersion [ 0 ] = = 3
) ) {
udata_printError ( ds , " usprep_swap(): data format %02x.%02x.%02x.%02x (format version %02x) is not recognized as StringPrep .spp data \n " ,
pInfo - > dataFormat [ 0 ] , pInfo - > dataFormat [ 1 ] ,
pInfo - > dataFormat [ 2 ] , pInfo - > dataFormat [ 3 ] ,
pInfo - > formatVersion [ 0 ] ) ;
* pErrorCode = U_UNSUPPORTED_ERROR ;
return 0 ;
}
inBytes = ( const uint8_t * ) inData + headerSize ;
outBytes = ( uint8_t * ) outData + headerSize ;
inIndexes = ( const int32_t * ) inBytes ;
if ( length > = 0 ) {
length - = headerSize ;
if ( length < 16 * 4 ) {
udata_printError ( ds , " usprep_swap(): too few bytes (%d after header) for StringPrep .spp data \n " ,
length ) ;
* pErrorCode = U_INDEX_OUTOFBOUNDS_ERROR ;
return 0 ;
}
}
/* read the first 16 indexes (ICU 2.8/format version 3: _SPREP_INDEX_TOP==16, might grow) */
for ( i = 0 ; i < 16 ; + + i ) {
indexes [ i ] = udata_readInt32 ( ds , inIndexes [ i ] ) ;
}
/* calculate the total length of the data */
size =
16 * 4 + /* size of indexes[] */
indexes [ _SPREP_INDEX_TRIE_SIZE ] +
indexes [ _SPREP_INDEX_MAPPING_DATA_SIZE ] ;
if ( length > = 0 ) {
if ( length < size ) {
udata_printError ( ds , " usprep_swap(): too few bytes (%d after header) for all of StringPrep .spp data \n " ,
length ) ;
* pErrorCode = U_INDEX_OUTOFBOUNDS_ERROR ;
return 0 ;
}
/* copy the data for inaccessible bytes */
if ( inBytes ! = outBytes ) {
uprv_memcpy ( outBytes , inBytes , size ) ;
}
offset = 0 ;
/* swap the int32_t indexes[] */
count = 16 * 4 ;
ds - > swapArray32 ( ds , inBytes , count , outBytes , pErrorCode ) ;
offset + = count ;
/* swap the UTrie */
count = indexes [ _SPREP_INDEX_TRIE_SIZE ] ;
utrie_swap ( ds , inBytes + offset , count , outBytes + offset , pErrorCode ) ;
offset + = count ;
/* swap the uint16_t mappingTable[] */
count = indexes [ _SPREP_INDEX_MAPPING_DATA_SIZE ] ;
ds - > swapArray16 ( ds , inBytes + offset , count , outBytes + offset , pErrorCode ) ;
2014-09-11 05:25:13 +00:00
//offset+=count;
2003-09-22 22:51:37 +00:00
}
return headerSize + size ;
}
2003-07-24 23:23:19 +00:00
# endif /* #if !UCONFIG_NO_IDNA */