2003-02-28 21:37:55 +00:00
|
|
|
/*
|
|
|
|
*******************************************************************************
|
|
|
|
*
|
|
|
|
* Copyright (C) 2002, International Business Machines
|
|
|
|
* Corporation and others. All Rights Reserved.
|
|
|
|
*
|
|
|
|
*******************************************************************************
|
|
|
|
* file name: nameprep.h
|
|
|
|
* encoding: US-ASCII
|
|
|
|
* tab size: 8 (not used)
|
|
|
|
* indentation:4
|
|
|
|
*
|
|
|
|
* created on: 2003feb1
|
|
|
|
* created by: Ram Viswanadha
|
|
|
|
*/
|
|
|
|
|
|
|
|
#include "nptrans.h"
|
|
|
|
#include "unicode/resbund.h"
|
|
|
|
#include "unicode/uniset.h"
|
|
|
|
#include "sprpimpl.h"
|
|
|
|
#include "cmemory.h"
|
|
|
|
#include "ustr_imp.h"
|
|
|
|
#include "intltest.h"
|
|
|
|
|
|
|
|
#ifdef DEBUG
|
|
|
|
#include <stdio.h>
|
|
|
|
#endif
|
|
|
|
|
|
|
|
const char NamePrepTransform::fgClassID=0;
|
|
|
|
|
|
|
|
//Factory method
|
|
|
|
NamePrepTransform* NamePrepTransform::createInstance(UParseError& parseError, UErrorCode& status){
|
2003-04-23 00:30:02 +00:00
|
|
|
NamePrepTransform* transform = new NamePrepTransform(parseError, status);
|
|
|
|
if(U_FAILURE(status)){
|
|
|
|
delete transform;
|
|
|
|
return NULL;
|
2003-02-28 21:37:55 +00:00
|
|
|
}
|
|
|
|
return transform;
|
|
|
|
}
|
|
|
|
|
|
|
|
//constructor
|
|
|
|
NamePrepTransform::NamePrepTransform(UParseError& parseError, UErrorCode& status)
|
|
|
|
: unassigned(), prohibited(), labelSeparatorSet(){
|
|
|
|
|
|
|
|
mapping = NULL;
|
|
|
|
bundle = NULL;
|
|
|
|
|
|
|
|
|
|
|
|
const char* testDataName = IntlTest::loadTestData(status);
|
|
|
|
|
|
|
|
if(U_FAILURE(status)){
|
|
|
|
return;
|
|
|
|
}
|
|
|
|
|
|
|
|
bundle = ures_openDirect(testDataName,"idna_rules",&status);
|
|
|
|
|
|
|
|
if(bundle != NULL && U_SUCCESS(status)){
|
|
|
|
// create the mapping transliterator
|
|
|
|
int32_t ruleLen = 0;
|
|
|
|
const UChar* ruleUChar = ures_getStringByKey(bundle, "MapNFKC",&ruleLen, &status);
|
|
|
|
UnicodeString rule(ruleUChar, ruleLen);
|
|
|
|
|
2003-04-22 15:49:53 +00:00
|
|
|
mapping = Transliterator::createFromRules(UnicodeString("NamePrepTransform", ""), rule,
|
2003-02-28 21:37:55 +00:00
|
|
|
UTRANS_FORWARD, parseError,status);
|
|
|
|
|
|
|
|
//create the unassigned set
|
|
|
|
int32_t patternLen =0;
|
|
|
|
const UChar* pattern = ures_getStringByKey(bundle,"UnassignedSet",&patternLen, &status);
|
|
|
|
unassigned.applyPattern(UnicodeString(pattern, patternLen), status);
|
|
|
|
|
|
|
|
//create prohibited set
|
|
|
|
patternLen=0;
|
|
|
|
pattern = ures_getStringByKey(bundle,"ProhibitedSet",&patternLen, &status);
|
|
|
|
UnicodeString test(pattern,patternLen);
|
|
|
|
prohibited.applyPattern(test,status);
|
|
|
|
#ifdef DEBUG
|
|
|
|
if(U_FAILURE(status)){
|
|
|
|
printf("Construction of Unicode set failed\n");
|
|
|
|
}
|
|
|
|
|
|
|
|
if(U_SUCCESS(status)){
|
|
|
|
if(prohibited.contains((UChar) 0x644)){
|
|
|
|
printf("The string contains 0x644 ... damn !!\n");
|
|
|
|
}
|
|
|
|
UnicodeString temp;
|
|
|
|
prohibited.toPattern(temp,TRUE);
|
|
|
|
|
|
|
|
for(int32_t i=0;i<temp.length();i++){
|
|
|
|
printf("%c", (char)temp.charAt(i));
|
|
|
|
}
|
|
|
|
printf("\n");
|
|
|
|
}
|
|
|
|
#endif
|
|
|
|
|
|
|
|
//create label separator set
|
|
|
|
patternLen=0;
|
|
|
|
pattern = ures_getStringByKey(bundle,"LabelSeparatorSet",&patternLen, &status);
|
|
|
|
labelSeparatorSet.applyPattern(UnicodeString(pattern,patternLen),status);
|
|
|
|
}
|
|
|
|
|
|
|
|
if(U_SUCCESS(status) &&
|
|
|
|
(mapping == NULL)
|
|
|
|
){
|
|
|
|
status = U_MEMORY_ALLOCATION_ERROR;
|
|
|
|
delete mapping;
|
|
|
|
ures_close(bundle);
|
|
|
|
mapping = NULL;
|
|
|
|
bundle = NULL;
|
|
|
|
}
|
|
|
|
|
|
|
|
}
|
|
|
|
|
|
|
|
|
|
|
|
UBool NamePrepTransform::isProhibited(UChar32 ch){
|
|
|
|
return (UBool)(ch != ASCII_SPACE);
|
|
|
|
}
|
|
|
|
|
|
|
|
NamePrepTransform::~NamePrepTransform(){
|
|
|
|
delete mapping;
|
|
|
|
mapping = NULL;
|
|
|
|
|
|
|
|
//close the bundle
|
|
|
|
ures_close(bundle);
|
|
|
|
bundle = NULL;
|
|
|
|
}
|
|
|
|
|
|
|
|
|
|
|
|
int32_t NamePrepTransform::map(const UChar* src, int32_t srcLength,
|
|
|
|
UChar* dest, int32_t destCapacity,
|
|
|
|
UBool allowUnassigned,
|
|
|
|
UParseError* parseError,
|
|
|
|
UErrorCode& status ){
|
|
|
|
|
|
|
|
if(U_FAILURE(status)){
|
|
|
|
return 0;
|
|
|
|
}
|
|
|
|
//check arguments
|
|
|
|
if(src==NULL || srcLength<-1 || (dest==NULL && destCapacity!=0)) {
|
|
|
|
status=U_ILLEGAL_ARGUMENT_ERROR;
|
|
|
|
return 0;
|
|
|
|
}
|
|
|
|
|
|
|
|
UnicodeString rsource(src,srcLength);
|
|
|
|
// map the code points
|
|
|
|
// transliteration also performs NFKC
|
|
|
|
mapping->transliterate(rsource);
|
|
|
|
|
|
|
|
const UChar* buffer = rsource.getBuffer();
|
|
|
|
int32_t bufLen = rsource.length();
|
|
|
|
// check if unassigned
|
|
|
|
if(allowUnassigned == FALSE){
|
|
|
|
int32_t bufIndex=0;
|
|
|
|
UChar32 ch =0 ;
|
|
|
|
for(;bufIndex<bufLen;){
|
|
|
|
U16_NEXT(buffer, bufIndex, bufLen, ch);
|
|
|
|
if(unassigned.contains(ch)){
|
|
|
|
status = U_IDNA_UNASSIGNED_CODEPOINT_FOUND_ERROR;
|
|
|
|
rsource.releaseBuffer();
|
|
|
|
return 0;
|
|
|
|
}
|
|
|
|
}
|
|
|
|
}
|
|
|
|
// check if there is enough room in the output
|
|
|
|
if(bufLen < destCapacity){
|
|
|
|
uprv_memcpy(dest,buffer,bufLen*U_SIZEOF_UCHAR);
|
|
|
|
}
|
|
|
|
|
|
|
|
return u_terminateUChars(dest, destCapacity, bufLen, &status);
|
|
|
|
}
|
|
|
|
|
|
|
|
|
|
|
|
#define MAX_BUFFER_SIZE 300
|
|
|
|
|
|
|
|
int32_t NamePrepTransform::process( const UChar* src, int32_t srcLength,
|
|
|
|
UChar* dest, int32_t destCapacity,
|
|
|
|
UBool allowUnassigned,
|
|
|
|
UParseError* parseError,
|
|
|
|
UErrorCode& status ){
|
|
|
|
// check error status
|
|
|
|
if(U_FAILURE(status)){
|
|
|
|
return 0;
|
|
|
|
}
|
|
|
|
|
|
|
|
//check arguments
|
|
|
|
if(src==NULL || srcLength<-1 || (dest==NULL && destCapacity!=0)) {
|
|
|
|
status=U_ILLEGAL_ARGUMENT_ERROR;
|
|
|
|
return 0;
|
|
|
|
}
|
|
|
|
|
|
|
|
UChar b1Stack[MAX_BUFFER_SIZE];
|
|
|
|
UChar *b1 = b1Stack;
|
|
|
|
int32_t b1Len,b1Capacity = MAX_BUFFER_SIZE;
|
|
|
|
|
|
|
|
int32_t b1Index = 0;
|
|
|
|
UCharDirection direction=U_CHAR_DIRECTION_COUNT, firstCharDir=U_CHAR_DIRECTION_COUNT;
|
|
|
|
UBool leftToRight=FALSE, rightToLeft=FALSE;
|
|
|
|
|
|
|
|
b1Len = map(src,srcLength, b1, b1Capacity,allowUnassigned,parseError, status);
|
|
|
|
|
|
|
|
if(status == U_BUFFER_OVERFLOW_ERROR){
|
|
|
|
// redo processing of string
|
|
|
|
/* we do not have enough room so grow the buffer*/
|
|
|
|
if(!u_growBufferFromStatic(b1Stack,&b1,&b1Capacity,b1Len,0)){
|
|
|
|
status = U_MEMORY_ALLOCATION_ERROR;
|
|
|
|
goto CLEANUP;
|
|
|
|
}
|
|
|
|
|
|
|
|
status = U_ZERO_ERROR; // reset error
|
|
|
|
|
|
|
|
b1Len = map(src,srcLength, b1, b1Len,allowUnassigned, parseError, status);
|
|
|
|
|
|
|
|
}
|
|
|
|
|
|
|
|
if(U_FAILURE(status)){
|
|
|
|
goto CLEANUP;
|
|
|
|
}
|
|
|
|
|
|
|
|
|
|
|
|
for(; b1Index<b1Len; ){
|
|
|
|
|
|
|
|
UChar32 ch = 0;
|
|
|
|
|
|
|
|
U16_NEXT(b1, b1Index, b1Len, ch);
|
|
|
|
|
|
|
|
if(prohibited.contains(ch) && ch!=0x0020){
|
|
|
|
status = U_IDNA_PROHIBITED_CODEPOINT_FOUND_ERROR;
|
|
|
|
goto CLEANUP;
|
|
|
|
}
|
|
|
|
|
|
|
|
direction = u_charDirection(ch);
|
|
|
|
if(firstCharDir==U_CHAR_DIRECTION_COUNT){
|
|
|
|
firstCharDir = direction;
|
|
|
|
}
|
|
|
|
if(direction == U_LEFT_TO_RIGHT){
|
|
|
|
leftToRight = TRUE;
|
|
|
|
}
|
|
|
|
if(direction == U_RIGHT_TO_LEFT || direction == U_RIGHT_TO_LEFT_ARABIC){
|
|
|
|
rightToLeft = TRUE;
|
|
|
|
}
|
|
|
|
}
|
|
|
|
|
|
|
|
// satisfy 2
|
|
|
|
if( leftToRight == TRUE && rightToLeft == TRUE){
|
|
|
|
status = U_IDNA_CHECK_BIDI_ERROR;
|
|
|
|
goto CLEANUP;
|
|
|
|
}
|
|
|
|
|
|
|
|
//satisfy 3
|
|
|
|
if(rightToLeft == TRUE && firstCharDir != direction ){
|
|
|
|
status = U_IDNA_CHECK_BIDI_ERROR;
|
|
|
|
return FALSE;
|
|
|
|
}
|
|
|
|
|
|
|
|
if(b1Len <= destCapacity){
|
|
|
|
uprv_memmove(dest,b1, b1Len*U_SIZEOF_UCHAR);
|
|
|
|
}
|
|
|
|
|
|
|
|
CLEANUP:
|
|
|
|
if(b1!=b1Stack){
|
|
|
|
uprv_free(b1);
|
|
|
|
}
|
|
|
|
|
|
|
|
return u_terminateUChars(dest, destCapacity, b1Len, &status);
|
|
|
|
}
|
|
|
|
|
|
|
|
UBool NamePrepTransform::isLabelSeparator(UChar32 ch, UErrorCode& status){
|
|
|
|
// check error status
|
|
|
|
if(U_FAILURE(status)){
|
|
|
|
return FALSE;
|
|
|
|
}
|
|
|
|
|
|
|
|
return labelSeparatorSet.contains(ch);
|
|
|
|
}
|
|
|
|
|
|
|
|
|