ICU-2466 add IMAP-mailbox-name converter
X-SVN-Rev: 10187
This commit is contained in:
parent
54d2cd87e5
commit
442a78aeec
@ -78,7 +78,7 @@ converterData[UCNV_NUMBER_OF_SUPPORTED_CONVERTER_TYPES]={
|
||||
#endif
|
||||
|
||||
&_ASCIIData,
|
||||
&_UTF7Data, &_Bocu1Data, &_UTF16Data, &_UTF32Data, &_CESU8Data
|
||||
&_UTF7Data, &_Bocu1Data, &_UTF16Data, &_UTF32Data, &_CESU8Data, &_IMAPData
|
||||
};
|
||||
|
||||
/* Please keep this in binary sorted order for getAlgorithmicTypeFromName.
|
||||
@ -93,6 +93,7 @@ static struct {
|
||||
{ "cesu8", UCNV_CESU8 },
|
||||
#if !UCONFIG_NO_LEGACY_CONVERSION
|
||||
{ "hz",UCNV_HZ },
|
||||
{ "imapmailboxname", UCNV_IMAP_MAILBOX },
|
||||
{ "iscii", UCNV_ISCII },
|
||||
{ "iso2022", UCNV_ISO_2022 },
|
||||
#endif
|
||||
|
@ -158,7 +158,7 @@ extern const UConverterSharedData
|
||||
_LMBCSData1,_LMBCSData2, _LMBCSData3, _LMBCSData4, _LMBCSData5, _LMBCSData6,
|
||||
_LMBCSData8,_LMBCSData11,_LMBCSData16,_LMBCSData17,_LMBCSData18,_LMBCSData19,
|
||||
_HZData,_ISCIIData, _SCSUData, _ASCIIData,
|
||||
_UTF7Data, _Bocu1Data, _UTF16Data, _UTF32Data, _CESU8Data;
|
||||
_UTF7Data, _Bocu1Data, _UTF16Data, _UTF32Data, _CESU8Data, _IMAPData;
|
||||
|
||||
U_CDECL_END
|
||||
|
||||
|
@ -498,7 +498,7 @@ callback:
|
||||
static UChar32
|
||||
_UTF7GetNextUChar(UConverterToUnicodeArgs *pArgs,
|
||||
UErrorCode *pErrorCode) {
|
||||
return ucnv_getNextUCharFromToUImpl(pArgs, _UTF7ToUnicodeWithOffsets, TRUE, pErrorCode);
|
||||
return ucnv_getNextUCharFromToUImpl(pArgs, pArgs->converter->sharedData->impl->toUnicode, TRUE, pErrorCode);
|
||||
}
|
||||
|
||||
static void
|
||||
@ -618,7 +618,7 @@ unicodeMode:
|
||||
if(target<targetLimit) {
|
||||
*target++=MINUS;
|
||||
if(offsets!=NULL) {
|
||||
*offsets++=sourceIndex;
|
||||
*offsets++=sourceIndex-1;
|
||||
}
|
||||
} else {
|
||||
cnv->charErrorBuffer[0]=MINUS;
|
||||
@ -744,8 +744,7 @@ unicodeMode:
|
||||
*offsets++=sourceIndex-1;
|
||||
}
|
||||
} else {
|
||||
cnv->charErrorBuffer[0]=toBase64[bits];
|
||||
cnv->charErrorBufferLength=1;
|
||||
cnv->charErrorBuffer[cnv->charErrorBufferLength++]=toBase64[bits];
|
||||
*pErrorCode=U_BUFFER_OVERFLOW_ERROR;
|
||||
}
|
||||
}
|
||||
@ -814,3 +813,738 @@ const UConverterSharedData _UTF7Data={
|
||||
NULL, NULL, &_UTF7StaticData, FALSE, &_UTF7Impl,
|
||||
0
|
||||
};
|
||||
|
||||
/* IMAP mailbox name encoding ----------------------------------------------- */
|
||||
|
||||
/*
|
||||
* RFC 2060: INTERNET MESSAGE ACCESS PROTOCOL - VERSION 4rev1
|
||||
* http://www.ietf.org/rfc/rfc2060.txt
|
||||
*
|
||||
* 5.1.3. Mailbox International Naming Convention
|
||||
*
|
||||
* By convention, international mailbox names are specified using a
|
||||
* modified version of the UTF-7 encoding described in [UTF-7]. The
|
||||
* purpose of these modifications is to correct the following problems
|
||||
* with UTF-7:
|
||||
*
|
||||
* 1) UTF-7 uses the "+" character for shifting; this conflicts with
|
||||
* the common use of "+" in mailbox names, in particular USENET
|
||||
* newsgroup names.
|
||||
*
|
||||
* 2) UTF-7's encoding is BASE64 which uses the "/" character; this
|
||||
* conflicts with the use of "/" as a popular hierarchy delimiter.
|
||||
*
|
||||
* 3) UTF-7 prohibits the unencoded usage of "\"; this conflicts with
|
||||
* the use of "\" as a popular hierarchy delimiter.
|
||||
*
|
||||
* 4) UTF-7 prohibits the unencoded usage of "~"; this conflicts with
|
||||
* the use of "~" in some servers as a home directory indicator.
|
||||
*
|
||||
* 5) UTF-7 permits multiple alternate forms to represent the same
|
||||
* string; in particular, printable US-ASCII chararacters can be
|
||||
* represented in encoded form.
|
||||
*
|
||||
* In modified UTF-7, printable US-ASCII characters except for "&"
|
||||
* represent themselves; that is, characters with octet values 0x20-0x25
|
||||
* and 0x27-0x7e. The character "&" (0x26) is represented by the two-
|
||||
* octet sequence "&-".
|
||||
*
|
||||
* All other characters (octet values 0x00-0x1f, 0x7f-0xff, and all
|
||||
* Unicode 16-bit octets) are represented in modified BASE64, with a
|
||||
* further modification from [UTF-7] that "," is used instead of "/".
|
||||
* Modified BASE64 MUST NOT be used to represent any printing US-ASCII
|
||||
* character which can represent itself.
|
||||
*
|
||||
* "&" is used to shift to modified BASE64 and "-" to shift back to US-
|
||||
* ASCII. All names start in US-ASCII, and MUST end in US-ASCII (that
|
||||
* is, a name that ends with a Unicode 16-bit octet MUST end with a "-
|
||||
* ").
|
||||
*
|
||||
* For example, here is a mailbox name which mixes English, Japanese,
|
||||
* and Chinese text: ~peter/mail/&ZeVnLIqe-/&U,BTFw-
|
||||
*/
|
||||
|
||||
/*
|
||||
* Tests for US-ASCII characters belonging to character classes
|
||||
* defined in UTF-7.
|
||||
*
|
||||
* Set D (directly encoded characters) consists of the following
|
||||
* characters: the upper and lower case letters A through Z
|
||||
* and a through z, the 10 digits 0-9, and the following nine special
|
||||
* characters (note that "+" and "=" are omitted):
|
||||
* '(),-./:?
|
||||
*
|
||||
* Set O (optional direct characters) consists of the following
|
||||
* characters (note that "\" and "~" are omitted):
|
||||
* !"#$%&*;<=>@[]^_`{|}
|
||||
*
|
||||
* According to the rules in RFC 2152, the byte values for the following
|
||||
* US-ASCII characters are not used in UTF-7 and are therefore illegal:
|
||||
* - all C0 control codes except for CR LF TAB
|
||||
* - BACKSLASH
|
||||
* - TILDE
|
||||
* - DEL
|
||||
* - all codes beyond US-ASCII, i.e. all >127
|
||||
*/
|
||||
|
||||
/* uses '&' not '+' to start a base64 sequence */
|
||||
#define AMPERSAND 0x26
|
||||
#define COMMA 0x2c
|
||||
#define SLASH 0x2f
|
||||
|
||||
/* legal byte values: all US-ASCII graphic characters 0x20..0x7e */
|
||||
#define isLegalIMAP(c) (0x20<=(c) && (c)<=0x7e)
|
||||
|
||||
/* direct-encode all of printable ASCII 0x20..0x7e except '&' 0x26 */
|
||||
#define inSetDIMAP(c) (isLegalIMAP(c) && c!=AMPERSAND)
|
||||
|
||||
#define TO_BASE64_IMAP(n) ((n)<63 ? toBase64[n] : COMMA)
|
||||
#define FROM_BASE64_IMAP(c) ((c)==COMMA ? 63 : (c)==SLASH ? -1 : fromBase64[c])
|
||||
|
||||
/*
|
||||
* converter status values:
|
||||
*
|
||||
* toUnicodeStatus:
|
||||
* 24 inDirectMode (boolean)
|
||||
* 23..16 base64Counter (-1..7)
|
||||
* 15..0 bits (up to 14 bits incoming base64)
|
||||
*
|
||||
* fromUnicodeStatus:
|
||||
* 24 inDirectMode (boolean)
|
||||
* 23..16 base64Counter (0..2)
|
||||
* 7..0 bits (6 bits outgoing base64)
|
||||
*
|
||||
* ignore bits 31..25
|
||||
*/
|
||||
|
||||
static void
|
||||
_IMAPToUnicodeWithOffsets(UConverterToUnicodeArgs *pArgs,
|
||||
UErrorCode *pErrorCode) {
|
||||
UConverter *cnv;
|
||||
const uint8_t *source, *sourceLimit;
|
||||
UChar *target;
|
||||
const UChar *targetLimit;
|
||||
int32_t *offsets;
|
||||
|
||||
uint8_t *bytes;
|
||||
uint8_t byteIndex;
|
||||
|
||||
int32_t length, targetCapacity;
|
||||
|
||||
/* UTF-7 state */
|
||||
uint16_t bits;
|
||||
int8_t base64Counter;
|
||||
UBool inDirectMode;
|
||||
|
||||
int8_t base64Value;
|
||||
|
||||
int32_t sourceIndex, nextSourceIndex;
|
||||
|
||||
UChar c;
|
||||
uint8_t b;
|
||||
|
||||
/* set up the local pointers */
|
||||
cnv=pArgs->converter;
|
||||
|
||||
source=(const uint8_t *)pArgs->source;
|
||||
sourceLimit=(const uint8_t *)pArgs->sourceLimit;
|
||||
target=pArgs->target;
|
||||
targetLimit=pArgs->targetLimit;
|
||||
offsets=pArgs->offsets;
|
||||
/* get the state machine state */
|
||||
{
|
||||
uint32_t status=cnv->toUnicodeStatus;
|
||||
inDirectMode=(UBool)((status>>24)&1);
|
||||
base64Counter=(int8_t)(status>>16);
|
||||
bits=(uint16_t)status;
|
||||
}
|
||||
bytes=cnv->toUBytes;
|
||||
byteIndex=cnv->toULength;
|
||||
|
||||
/* sourceIndex=-1 if the current character began in the previous buffer */
|
||||
sourceIndex=byteIndex==0 ? 0 : -1;
|
||||
nextSourceIndex=0;
|
||||
|
||||
loop:
|
||||
if(inDirectMode) {
|
||||
directMode:
|
||||
/*
|
||||
* In Direct Mode, US-ASCII characters are encoded directly, i.e.,
|
||||
* with their US-ASCII byte values.
|
||||
* An ampersand starts Unicode (or "escape") Mode.
|
||||
*
|
||||
* In Direct Mode, only the sourceIndex is used.
|
||||
*/
|
||||
byteIndex=0;
|
||||
length=sourceLimit-source;
|
||||
targetCapacity=targetLimit-target;
|
||||
if(length>targetCapacity) {
|
||||
length=targetCapacity;
|
||||
}
|
||||
while(length>0) {
|
||||
b=*source++;
|
||||
if(!isLegalIMAP(b)) {
|
||||
/* illegal */
|
||||
bytes[0]=b;
|
||||
byteIndex=1;
|
||||
nextSourceIndex=sourceIndex+1;
|
||||
goto callback;
|
||||
} else if(b!=AMPERSAND) {
|
||||
/* write directly encoded character */
|
||||
*target++=b;
|
||||
if(offsets!=NULL) {
|
||||
*offsets++=sourceIndex++;
|
||||
}
|
||||
} else /* AMPERSAND */ {
|
||||
/* switch to Unicode mode */
|
||||
nextSourceIndex=++sourceIndex;
|
||||
inDirectMode=FALSE;
|
||||
byteIndex=0;
|
||||
bits=0;
|
||||
base64Counter=-1;
|
||||
goto unicodeMode;
|
||||
}
|
||||
--length;
|
||||
}
|
||||
if(source<sourceLimit && target>=targetLimit) {
|
||||
/* target is full */
|
||||
*pErrorCode=U_BUFFER_OVERFLOW_ERROR;
|
||||
}
|
||||
} else {
|
||||
unicodeMode:
|
||||
/*
|
||||
* In Unicode (or "escape") Mode, UTF-16BE is base64-encoded.
|
||||
* The base64 sequence ends with any character that is not in the base64 alphabet.
|
||||
* A terminating minus sign is consumed.
|
||||
* US-ASCII must not be base64-ed.
|
||||
*
|
||||
* In Unicode Mode, the sourceIndex has the index to the start of the current
|
||||
* base64 bytes, while nextSourceIndex is precisely parallel to source,
|
||||
* keeping the index to the following byte.
|
||||
* Note that in 2 out of 3 cases, UChars overlap within a base64 byte.
|
||||
*/
|
||||
while(source<sourceLimit) {
|
||||
if(target<targetLimit) {
|
||||
bytes[byteIndex++]=b=*source++;
|
||||
++nextSourceIndex;
|
||||
if(b>0x7e) {
|
||||
/* illegal - test other illegal US-ASCII values by base64Value==-3 */
|
||||
inDirectMode=TRUE;
|
||||
goto callback;
|
||||
} else if((base64Value=FROM_BASE64_IMAP(b))>=0) {
|
||||
/* collect base64 bytes into UChars */
|
||||
switch(base64Counter) {
|
||||
case -1: /* -1 is immediately after the & */
|
||||
case 0:
|
||||
bits=base64Value;
|
||||
base64Counter=1;
|
||||
break;
|
||||
case 1:
|
||||
case 3:
|
||||
case 4:
|
||||
case 6:
|
||||
bits=(uint16_t)((bits<<6)|base64Value);
|
||||
++base64Counter;
|
||||
break;
|
||||
case 2:
|
||||
c=(UChar)((bits<<4)|(base64Value>>2));
|
||||
if(isLegalIMAP(c)) {
|
||||
/* illegal */
|
||||
inDirectMode=TRUE;
|
||||
goto callback;
|
||||
}
|
||||
*target++=c;
|
||||
if(offsets!=NULL) {
|
||||
*offsets++=sourceIndex;
|
||||
sourceIndex=nextSourceIndex-1;
|
||||
}
|
||||
bytes[0]=b; /* keep this byte in case an error occurs */
|
||||
byteIndex=1;
|
||||
bits=(uint16_t)(base64Value&3);
|
||||
base64Counter=3;
|
||||
break;
|
||||
case 5:
|
||||
c=(UChar)((bits<<2)|(base64Value>>4));
|
||||
if(isLegalIMAP(c)) {
|
||||
/* illegal */
|
||||
inDirectMode=TRUE;
|
||||
goto callback;
|
||||
}
|
||||
*target++=c;
|
||||
if(offsets!=NULL) {
|
||||
*offsets++=sourceIndex;
|
||||
sourceIndex=nextSourceIndex-1;
|
||||
}
|
||||
bytes[0]=b; /* keep this byte in case an error occurs */
|
||||
byteIndex=1;
|
||||
bits=(uint16_t)(base64Value&15);
|
||||
base64Counter=6;
|
||||
break;
|
||||
case 7:
|
||||
c=(UChar)((bits<<6)|base64Value);
|
||||
if(isLegalIMAP(c)) {
|
||||
/* illegal */
|
||||
inDirectMode=TRUE;
|
||||
goto callback;
|
||||
}
|
||||
*target++=c;
|
||||
if(offsets!=NULL) {
|
||||
*offsets++=sourceIndex;
|
||||
sourceIndex=nextSourceIndex;
|
||||
}
|
||||
byteIndex=0;
|
||||
bits=0;
|
||||
base64Counter=0;
|
||||
break;
|
||||
default:
|
||||
/* will never occur */
|
||||
break;
|
||||
}
|
||||
} else if(base64Value==-2) {
|
||||
/* minus sign terminates the base64 sequence */
|
||||
inDirectMode=TRUE;
|
||||
if(base64Counter==-1) {
|
||||
/* &- i.e. a minus immediately following an ampersand */
|
||||
*target++=AMPERSAND;
|
||||
if(offsets!=NULL) {
|
||||
*offsets++=sourceIndex-1;
|
||||
}
|
||||
} else {
|
||||
/* absorb the minus and leave the Unicode Mode */
|
||||
if(bits!=0 || (base64Counter!=0 && base64Counter!=3 && base64Counter!=6)) {
|
||||
/* bits are illegally left over, a UChar is incomplete */
|
||||
/* base64Counter other than 0, 3, 6 means non-minimal zero-padding, also illegal */
|
||||
goto callback;
|
||||
}
|
||||
}
|
||||
sourceIndex=nextSourceIndex;
|
||||
goto directMode;
|
||||
} else {
|
||||
if(base64Counter==-1) {
|
||||
/* illegal: & immediately followed by something other than base64 or minus sign */
|
||||
/* include the ampersand in the reported sequence */
|
||||
--sourceIndex;
|
||||
bytes[0]=AMPERSAND;
|
||||
bytes[1]=b;
|
||||
byteIndex=2;
|
||||
}
|
||||
/* base64Value==-1 for characters that are illegal only in Unicode mode */
|
||||
/* base64Value==-3 for illegal characters */
|
||||
/* illegal */
|
||||
inDirectMode=TRUE;
|
||||
goto callback;
|
||||
}
|
||||
} else {
|
||||
/* target is full */
|
||||
*pErrorCode=U_BUFFER_OVERFLOW_ERROR;
|
||||
break;
|
||||
}
|
||||
}
|
||||
}
|
||||
endloop:
|
||||
|
||||
if(pArgs->flush && source>=sourceLimit) {
|
||||
/* reset the state for the next conversion */
|
||||
if(!inDirectMode && U_SUCCESS(*pErrorCode)) {
|
||||
/* a character byte sequence remains incomplete - IMAP must end in ASCII/direct mode */
|
||||
*pErrorCode=U_TRUNCATED_CHAR_FOUND;
|
||||
}
|
||||
cnv->toUnicodeStatus=0x1000000; /* inDirectMode=TRUE */
|
||||
cnv->toULength=0;
|
||||
} else {
|
||||
/* set the converter state back into UConverter */
|
||||
cnv->toUnicodeStatus=((uint32_t)inDirectMode<<24)|((uint32_t)((uint8_t)base64Counter)<<16)|(uint32_t)bits;
|
||||
cnv->toULength=byteIndex;
|
||||
}
|
||||
|
||||
finish:
|
||||
/* write back the updated pointers */
|
||||
pArgs->source=(const char *)source;
|
||||
pArgs->target=target;
|
||||
pArgs->offsets=offsets;
|
||||
return;
|
||||
|
||||
callback:
|
||||
/* call the callback function with all the preparations and post-processing */
|
||||
/* update the arguments structure */
|
||||
pArgs->source=(const char *)source;
|
||||
pArgs->target=target;
|
||||
pArgs->offsets=offsets;
|
||||
|
||||
/* copy the current bytes to invalidCharBuffer */
|
||||
for(b=0; b<(uint8_t)byteIndex; ++b) {
|
||||
cnv->invalidCharBuffer[b]=(char)bytes[b];
|
||||
}
|
||||
cnv->invalidCharLength=byteIndex;
|
||||
|
||||
/* set the converter state in UConverter to deal with the next character */
|
||||
cnv->toUnicodeStatus=(uint32_t)inDirectMode<<24;
|
||||
cnv->toULength=0;
|
||||
|
||||
/* call the callback function */
|
||||
*pErrorCode=U_ILLEGAL_CHAR_FOUND;
|
||||
cnv->fromCharErrorBehaviour(cnv->toUContext, pArgs, cnv->invalidCharBuffer, cnv->invalidCharLength, UCNV_ILLEGAL, pErrorCode);
|
||||
|
||||
/* get the converter state from UConverter */
|
||||
{
|
||||
uint32_t status=cnv->toUnicodeStatus;
|
||||
inDirectMode=(UBool)((status>>24)&1);
|
||||
base64Counter=(int8_t)(status>>16);
|
||||
bits=(uint16_t)status;
|
||||
}
|
||||
byteIndex=cnv->toULength;
|
||||
|
||||
/* update target and deal with offsets if necessary */
|
||||
offsets=ucnv_updateCallbackOffsets(offsets, pArgs->target-target, sourceIndex);
|
||||
target=pArgs->target;
|
||||
|
||||
/* update the source pointer and index */
|
||||
sourceIndex=nextSourceIndex+((const uint8_t *)pArgs->source-source);
|
||||
source=(const uint8_t *)pArgs->source;
|
||||
|
||||
/*
|
||||
* If the callback overflowed the target, then we need to
|
||||
* stop here with an overflow indication.
|
||||
*/
|
||||
if(*pErrorCode==U_BUFFER_OVERFLOW_ERROR) {
|
||||
goto endloop;
|
||||
} else if(cnv->UCharErrorBufferLength>0) {
|
||||
/* target is full */
|
||||
*pErrorCode=U_BUFFER_OVERFLOW_ERROR;
|
||||
goto endloop;
|
||||
} else if(U_FAILURE(*pErrorCode)) {
|
||||
/* break on error */
|
||||
cnv->toUnicodeStatus=0x1000000; /* inDirectMode=TRUE */
|
||||
cnv->toULength=0;
|
||||
goto finish;
|
||||
} else {
|
||||
goto loop;
|
||||
}
|
||||
}
|
||||
|
||||
static void
|
||||
_IMAPFromUnicodeWithOffsets(UConverterFromUnicodeArgs *pArgs,
|
||||
UErrorCode *pErrorCode) {
|
||||
UConverter *cnv;
|
||||
const UChar *source, *sourceLimit;
|
||||
uint8_t *target, *targetLimit;
|
||||
int32_t *offsets;
|
||||
|
||||
int32_t length, targetCapacity, sourceIndex;
|
||||
UChar c;
|
||||
uint8_t b;
|
||||
|
||||
/* UTF-7 state */
|
||||
uint8_t bits;
|
||||
int8_t base64Counter;
|
||||
UBool inDirectMode;
|
||||
|
||||
/* set up the local pointers */
|
||||
cnv=pArgs->converter;
|
||||
|
||||
/* set up the local pointers */
|
||||
source=pArgs->source;
|
||||
sourceLimit=pArgs->sourceLimit;
|
||||
target=(uint8_t *)pArgs->target;
|
||||
targetLimit=(uint8_t *)pArgs->targetLimit;
|
||||
offsets=pArgs->offsets;
|
||||
|
||||
/* get the state machine state */
|
||||
{
|
||||
uint32_t status=cnv->fromUnicodeStatus;
|
||||
inDirectMode=(UBool)((status>>24)&1);
|
||||
base64Counter=(int8_t)(status>>16);
|
||||
bits=(uint8_t)status;
|
||||
}
|
||||
|
||||
/* UTF-7 always encodes UTF-16 code units, therefore we need only a simple sourceIndex */
|
||||
sourceIndex=0;
|
||||
|
||||
if(inDirectMode) {
|
||||
directMode:
|
||||
length=sourceLimit-source;
|
||||
targetCapacity=targetLimit-target;
|
||||
if(length>targetCapacity) {
|
||||
length=targetCapacity;
|
||||
}
|
||||
while(length>0) {
|
||||
c=*source++;
|
||||
/* encode 0x20..0x7e except '&' directly */
|
||||
if(inSetDIMAP(c)) {
|
||||
/* encode directly */
|
||||
*target++=(uint8_t)c;
|
||||
if(offsets!=NULL) {
|
||||
*offsets++=sourceIndex++;
|
||||
}
|
||||
} else if(c==AMPERSAND) {
|
||||
/* output &- for & */
|
||||
*target++=AMPERSAND;
|
||||
if(target<targetLimit) {
|
||||
*target++=MINUS;
|
||||
if(offsets!=NULL) {
|
||||
*offsets++=sourceIndex;
|
||||
*offsets++=sourceIndex++;
|
||||
}
|
||||
/* realign length and targetCapacity */
|
||||
goto directMode;
|
||||
} else {
|
||||
if(offsets!=NULL) {
|
||||
*offsets++=sourceIndex++;
|
||||
}
|
||||
cnv->charErrorBuffer[0]=MINUS;
|
||||
cnv->charErrorBufferLength=1;
|
||||
*pErrorCode=U_BUFFER_OVERFLOW_ERROR;
|
||||
break;
|
||||
}
|
||||
} else {
|
||||
/* un-read this character and switch to Unicode Mode */
|
||||
--source;
|
||||
*target++=AMPERSAND;
|
||||
if(offsets!=NULL) {
|
||||
*offsets++=sourceIndex;
|
||||
}
|
||||
inDirectMode=FALSE;
|
||||
base64Counter=0;
|
||||
goto unicodeMode;
|
||||
}
|
||||
--length;
|
||||
}
|
||||
if(source<sourceLimit && target>=targetLimit) {
|
||||
/* target is full */
|
||||
*pErrorCode=U_BUFFER_OVERFLOW_ERROR;
|
||||
}
|
||||
} else {
|
||||
unicodeMode:
|
||||
while(source<sourceLimit) {
|
||||
if(target<targetLimit) {
|
||||
c=*source++;
|
||||
if(isLegalIMAP(c)) {
|
||||
/* encode directly */
|
||||
inDirectMode=TRUE;
|
||||
|
||||
/* trick: back out this character to make this easier */
|
||||
--source;
|
||||
|
||||
/* terminate the base64 sequence */
|
||||
if(base64Counter!=0) {
|
||||
/* write remaining bits for the previous character */
|
||||
*target++=TO_BASE64_IMAP(bits);
|
||||
if(offsets!=NULL) {
|
||||
*offsets++=sourceIndex-1;
|
||||
}
|
||||
}
|
||||
/* need to terminate with a minus */
|
||||
if(target<targetLimit) {
|
||||
*target++=MINUS;
|
||||
if(offsets!=NULL) {
|
||||
*offsets++=sourceIndex-1;
|
||||
}
|
||||
} else {
|
||||
cnv->charErrorBuffer[0]=MINUS;
|
||||
cnv->charErrorBufferLength=1;
|
||||
*pErrorCode=U_BUFFER_OVERFLOW_ERROR;
|
||||
break;
|
||||
}
|
||||
goto directMode;
|
||||
} else {
|
||||
/*
|
||||
* base64 this character:
|
||||
* Output 2 or 3 base64 bytes for the remaining bits of the previous character
|
||||
* and the bits of this character, each implicitly in UTF-16BE.
|
||||
*
|
||||
* Here, bits is an 8-bit variable because only 6 bits need to be kept from one
|
||||
* character to the next. The actual 2 or 4 bits are shifted to the left edge
|
||||
* of the 6-bits field 5..0 to make the termination of the base64 sequence easier.
|
||||
*/
|
||||
switch(base64Counter) {
|
||||
case 0:
|
||||
b=(uint8_t)(c>>10);
|
||||
*target++=TO_BASE64_IMAP(b);
|
||||
if(target<targetLimit) {
|
||||
b=(uint8_t)((c>>4)&0x3f);
|
||||
*target++=TO_BASE64_IMAP(b);
|
||||
if(offsets!=NULL) {
|
||||
*offsets++=sourceIndex;
|
||||
*offsets++=sourceIndex++;
|
||||
}
|
||||
} else {
|
||||
if(offsets!=NULL) {
|
||||
*offsets++=sourceIndex++;
|
||||
}
|
||||
b=(uint8_t)((c>>4)&0x3f);
|
||||
cnv->charErrorBuffer[0]=TO_BASE64_IMAP(b);
|
||||
cnv->charErrorBufferLength=1;
|
||||
*pErrorCode=U_BUFFER_OVERFLOW_ERROR;
|
||||
}
|
||||
bits=(uint8_t)((c&15)<<2);
|
||||
base64Counter=1;
|
||||
break;
|
||||
case 1:
|
||||
b=(uint8_t)(bits|(c>>14));
|
||||
*target++=TO_BASE64_IMAP(b);
|
||||
if(target<targetLimit) {
|
||||
b=(uint8_t)((c>>8)&0x3f);
|
||||
*target++=TO_BASE64_IMAP(b);
|
||||
if(target<targetLimit) {
|
||||
b=(uint8_t)((c>>2)&0x3f);
|
||||
*target++=TO_BASE64_IMAP(b);
|
||||
if(offsets!=NULL) {
|
||||
*offsets++=sourceIndex;
|
||||
*offsets++=sourceIndex;
|
||||
*offsets++=sourceIndex++;
|
||||
}
|
||||
} else {
|
||||
if(offsets!=NULL) {
|
||||
*offsets++=sourceIndex;
|
||||
*offsets++=sourceIndex++;
|
||||
}
|
||||
b=(uint8_t)((c>>2)&0x3f);
|
||||
cnv->charErrorBuffer[0]=TO_BASE64_IMAP(b);
|
||||
cnv->charErrorBufferLength=1;
|
||||
*pErrorCode=U_BUFFER_OVERFLOW_ERROR;
|
||||
}
|
||||
} else {
|
||||
if(offsets!=NULL) {
|
||||
*offsets++=sourceIndex++;
|
||||
}
|
||||
b=(uint8_t)((c>>8)&0x3f);
|
||||
cnv->charErrorBuffer[0]=TO_BASE64_IMAP(b);
|
||||
b=(uint8_t)((c>>2)&0x3f);
|
||||
cnv->charErrorBuffer[1]=TO_BASE64_IMAP(b);
|
||||
cnv->charErrorBufferLength=2;
|
||||
*pErrorCode=U_BUFFER_OVERFLOW_ERROR;
|
||||
}
|
||||
bits=(uint8_t)((c&3)<<4);
|
||||
base64Counter=2;
|
||||
break;
|
||||
case 2:
|
||||
b=(uint8_t)(bits|(c>>12));
|
||||
*target++=TO_BASE64_IMAP(b);
|
||||
if(target<targetLimit) {
|
||||
b=(uint8_t)((c>>6)&0x3f);
|
||||
*target++=TO_BASE64_IMAP(b);
|
||||
if(target<targetLimit) {
|
||||
b=(uint8_t)(c&0x3f);
|
||||
*target++=TO_BASE64_IMAP(b);
|
||||
if(offsets!=NULL) {
|
||||
*offsets++=sourceIndex;
|
||||
*offsets++=sourceIndex;
|
||||
*offsets++=sourceIndex++;
|
||||
}
|
||||
} else {
|
||||
if(offsets!=NULL) {
|
||||
*offsets++=sourceIndex;
|
||||
*offsets++=sourceIndex++;
|
||||
}
|
||||
b=(uint8_t)(c&0x3f);
|
||||
cnv->charErrorBuffer[0]=TO_BASE64_IMAP(b);
|
||||
cnv->charErrorBufferLength=1;
|
||||
*pErrorCode=U_BUFFER_OVERFLOW_ERROR;
|
||||
}
|
||||
} else {
|
||||
if(offsets!=NULL) {
|
||||
*offsets++=sourceIndex++;
|
||||
}
|
||||
b=(uint8_t)((c>>6)&0x3f);
|
||||
cnv->charErrorBuffer[0]=TO_BASE64_IMAP(b);
|
||||
b=(uint8_t)(c&0x3f);
|
||||
cnv->charErrorBuffer[1]=TO_BASE64_IMAP(b);
|
||||
cnv->charErrorBufferLength=2;
|
||||
*pErrorCode=U_BUFFER_OVERFLOW_ERROR;
|
||||
}
|
||||
bits=0;
|
||||
base64Counter=0;
|
||||
break;
|
||||
default:
|
||||
/* will never occur */
|
||||
break;
|
||||
}
|
||||
}
|
||||
} else {
|
||||
/* target is full */
|
||||
*pErrorCode=U_BUFFER_OVERFLOW_ERROR;
|
||||
break;
|
||||
}
|
||||
}
|
||||
}
|
||||
|
||||
if(pArgs->flush && source>=sourceLimit) {
|
||||
/* flush remaining bits to the target */
|
||||
if(!inDirectMode) {
|
||||
if(base64Counter!=0) {
|
||||
if(target<targetLimit) {
|
||||
*target++=TO_BASE64_IMAP(bits);
|
||||
if(offsets!=NULL) {
|
||||
*offsets++=sourceIndex-1;
|
||||
}
|
||||
} else {
|
||||
cnv->charErrorBuffer[cnv->charErrorBufferLength++]=TO_BASE64_IMAP(bits);
|
||||
*pErrorCode=U_BUFFER_OVERFLOW_ERROR;
|
||||
}
|
||||
}
|
||||
/* need to terminate with a minus */
|
||||
if(target<targetLimit) {
|
||||
*target++=MINUS;
|
||||
if(offsets!=NULL) {
|
||||
*offsets++=sourceIndex-1;
|
||||
}
|
||||
} else {
|
||||
cnv->charErrorBuffer[cnv->charErrorBufferLength++]=MINUS;
|
||||
*pErrorCode=U_BUFFER_OVERFLOW_ERROR;
|
||||
}
|
||||
}
|
||||
/* reset the state for the next conversion */
|
||||
cnv->fromUnicodeStatus=(cnv->fromUnicodeStatus&0xf0000000)|0x1000000; /* keep version, inDirectMode=TRUE */
|
||||
} else {
|
||||
/* set the converter state back into UConverter */
|
||||
cnv->fromUnicodeStatus=
|
||||
(cnv->fromUnicodeStatus&0xf0000000)| /* keep version*/
|
||||
((uint32_t)inDirectMode<<24)|((uint32_t)base64Counter<<16)|(uint32_t)bits;
|
||||
}
|
||||
|
||||
/* write back the updated pointers */
|
||||
pArgs->source=source;
|
||||
pArgs->target=(char *)target;
|
||||
pArgs->offsets=offsets;
|
||||
return;
|
||||
}
|
||||
|
||||
static const UConverterImpl _IMAPImpl={
|
||||
UCNV_IMAP_MAILBOX,
|
||||
|
||||
NULL,
|
||||
NULL,
|
||||
|
||||
_UTF7Open,
|
||||
NULL,
|
||||
_UTF7Reset,
|
||||
|
||||
_IMAPToUnicodeWithOffsets,
|
||||
_IMAPToUnicodeWithOffsets,
|
||||
_IMAPFromUnicodeWithOffsets,
|
||||
_IMAPFromUnicodeWithOffsets,
|
||||
_UTF7GetNextUChar,
|
||||
|
||||
NULL,
|
||||
NULL,
|
||||
NULL /* we don't need writeSub() because we never call a callback at fromUnicode() */
|
||||
};
|
||||
|
||||
static const UConverterStaticData _IMAPStaticData={
|
||||
sizeof(UConverterStaticData),
|
||||
"IMAP-mailbox-name",
|
||||
0, /* TODO CCSID for UTF-7 */
|
||||
UCNV_IBM, UCNV_IMAP_MAILBOX,
|
||||
1, 4,
|
||||
{ 0x3f, 0, 0, 0 }, 1, /* the subchar is not used */
|
||||
FALSE, FALSE,
|
||||
0,
|
||||
0,
|
||||
{ 0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0 } /* reserved */
|
||||
};
|
||||
|
||||
const UConverterSharedData _IMAPData={
|
||||
sizeof(UConverterSharedData), ~((uint32_t)0),
|
||||
NULL, NULL, &_IMAPStaticData, FALSE, &_IMAPImpl,
|
||||
0
|
||||
};
|
||||
|
@ -104,6 +104,7 @@ typedef enum {
|
||||
UCNV_UTF16,
|
||||
UCNV_UTF32,
|
||||
UCNV_CESU8,
|
||||
UCNV_IMAP_MAILBOX,
|
||||
|
||||
/* Number of converter types for which we have conversion routines. */
|
||||
UCNV_NUMBER_OF_SUPPORTED_CONVERTER_TYPES
|
||||
|
@ -148,6 +148,14 @@ UTF32_OppositeEndian
|
||||
# For details about email headers see RFC 2047.
|
||||
UTF-7 { IANA* MIME* } cp65000
|
||||
|
||||
# IMAP-mailbox-name is an ICU-specific name for the encoding of IMAP mailbox names.
|
||||
# It is a substantially modified UTF-7 encoding. See the specification in:
|
||||
#
|
||||
# RFC 2060: INTERNET MESSAGE ACCESS PROTOCOL - VERSION 4rev1
|
||||
# (http://www.ietf.org/rfc/rfc2060.txt)
|
||||
# Section 5.1.3. Mailbox International Naming Convention
|
||||
IMAP-mailbox-name
|
||||
|
||||
SCSU { IANA* }
|
||||
BOCU-1 { IANA* } csBOCU-1 { IANA }
|
||||
|
||||
|
@ -1707,6 +1707,26 @@ static void TestSub(int32_t inputsize, int32_t outputsize)
|
||||
}
|
||||
}
|
||||
|
||||
log_verbose("Testing IMAP-mailbox-name toUnicode with substitute callbacks\n");
|
||||
{
|
||||
static const uint8_t bytes[]={
|
||||
/* aDEL a&AB~ a&AB\x0c a&AB- a&AB. a&. */
|
||||
0x61, 0x7f, 0x61, 0x26, 0x41, 0x42, 0x7e, 0x61, 0x26, 0x41, 0x42, 0x0c, 0x61, 0x26, 0x41, 0x42, 0x2d, 0x61, 0x26, 0x41, 0x42, 0x2e, 0x61, 0x26, 0x2e
|
||||
};
|
||||
static const UChar unicode[]={
|
||||
0x61, 0xfffd, 0x61, 0xfffd, 0x61, 0xfffd, 0x61, 0xfffd, 0x61, 0xfffd, 0x61, 0xfffd
|
||||
};
|
||||
static const int32_t offsets[]={
|
||||
0, 1, 2, 4, 7, 9, 12, 14, 17, 19, 22, 23
|
||||
};
|
||||
|
||||
if(!testConvertToUnicode(bytes, ARRAY_LENGTH(bytes), unicode, ARRAY_LENGTH(unicode), "IMAP-mailbox-name",
|
||||
UCNV_TO_U_CALLBACK_SUBSTITUTE, offsets, NULL, 0)
|
||||
) {
|
||||
log_err("IMAP-mailbox-name->u with substitute did not match.\n");
|
||||
}
|
||||
}
|
||||
|
||||
log_verbose("Testing UTF-16 toUnicode with substitute callbacks\n");
|
||||
{
|
||||
static const uint8_t
|
||||
|
@ -1772,11 +1772,16 @@ doTestTruncated(const char *cnvName, const uint8_t *bytes, int32_t length) {
|
||||
|
||||
static void
|
||||
TestTruncated() {
|
||||
struct {
|
||||
static const struct {
|
||||
const char *cnvName;
|
||||
uint8_t bytes[8]; /* partial input bytes resulting in no output */
|
||||
int32_t length;
|
||||
} testCases[]={
|
||||
{ "IMAP-mailbox-name", { 0x26 }, 1 }, /* & */
|
||||
{ "IMAP-mailbox-name", { 0x26, 0x42 }, 2 }, /* &B */
|
||||
{ "IMAP-mailbox-name", { 0x26, 0x42, 0x42 }, 3 }, /* &BB */
|
||||
{ "IMAP-mailbox-name", { 0x26, 0x41, 0x41 }, 3 }, /* &AA */
|
||||
|
||||
{ "UTF-7", { 0x2b, 0x42 }, 2 }, /* +B */
|
||||
{ "UTF-8", { 0xd1 }, 1 },
|
||||
|
||||
|
@ -34,6 +34,7 @@ static void TestConverterTypesAndStarters(void);
|
||||
static void TestAmbiguous(void);
|
||||
static void TestSignatureDetection(void);
|
||||
static void TestUTF7(void);
|
||||
static void TestIMAP(void);
|
||||
static void TestUTF8(void);
|
||||
static void TestCESU8(void);
|
||||
static void TestUTF16(void);
|
||||
@ -145,7 +146,9 @@ TestNextUChar(UConverter* cnv, const char* source, const char* limit, const uint
|
||||
while(s<limit) {
|
||||
s0=s;
|
||||
c=ucnv_getNextUChar(cnv, &s, limit, &errorCode);
|
||||
if(U_FAILURE(errorCode)) {
|
||||
if(errorCode==U_INDEX_OUTOFBOUNDS_ERROR) {
|
||||
break; /* no more significant input */
|
||||
} else if(U_FAILURE(errorCode)) {
|
||||
log_err("%s ucnv_getNextUChar() failed: %s\n", message, u_errorName(errorCode));
|
||||
break;
|
||||
} else if((uint32_t)(s-s0)!=*r || c!=*(r+1)) {
|
||||
@ -210,6 +213,7 @@ void addTestNewConvert(TestNode** root)
|
||||
addTest(root, &TestAmbiguous, "tsconv/nucnvtst/TestAmbiguous");
|
||||
addTest(root, &TestSignatureDetection, "tsconv/nucnvtst/TestSignatureDetection");
|
||||
addTest(root, &TestUTF7, "tsconv/nucnvtst/TestUTF7");
|
||||
addTest(root, &TestIMAP, "tsconv/nucnvtst/TestIMAP");
|
||||
addTest(root, &TestUTF8, "tsconv/nucnvtst/TestUTF8");
|
||||
addTest(root, &TestCESU8, "tsconv/nucnvtst/TestCESU8");
|
||||
addTest(root, &TestUTF16, "tsconv/nucnvtst/TestUTF16");
|
||||
@ -404,7 +408,7 @@ static ETestConvertResult testConvertFromU( const UChar *source, int sourceLen,
|
||||
log_err("\n");
|
||||
log_err("Got : ");
|
||||
for(p=junkout;p<targ;p++) {
|
||||
log_err("%d, ", junokout[p-junkout]);
|
||||
log_err("%d,", junokout[p-junkout]);
|
||||
}
|
||||
log_err("\n");
|
||||
log_err("Expected: ");
|
||||
@ -880,7 +884,7 @@ static void TestNewConvertWithBufferSizes(int32_t outsize, int32_t insize )
|
||||
27, 29, 32
|
||||
};
|
||||
static const int32_t fromUnicodeOffsets[] = {
|
||||
0, 1, 2, 3, 4, 5, 6, 7, 8, 8, 8, 8, 9, 9, 10,
|
||||
0, 1, 2, 3, 4, 5, 6, 7, 8, 8, 8, 8, 8, 9, 10,
|
||||
11, 12, 12, 12, 13, 13, 13, 13, 14,
|
||||
15, 15,
|
||||
16, 16, 16, 17, 17, 17, 18, 18, 18
|
||||
@ -906,19 +910,95 @@ static void TestNewConvertWithBufferSizes(int32_t outsize, int32_t insize )
|
||||
31, 33, 36
|
||||
};
|
||||
static const int32_t fromUnicodeOffsetsR[] = {
|
||||
0, 1, 2, 3, 4, 5, 6, 7, 8, 8, 8, 8, 9, 9, 10, 10, 10, 10, 11,
|
||||
0, 1, 2, 3, 4, 5, 6, 7, 8, 8, 8, 8, 8, 9, 10, 10, 10, 10, 10,
|
||||
11, 12, 12, 12, 13, 13, 13, 13, 14,
|
||||
15, 15,
|
||||
16, 16, 16, 17, 17, 17, 18, 18, 18
|
||||
};
|
||||
|
||||
testConvertFromU(unicode, sizeof(unicode)/U_SIZEOF_UCHAR, utf7, sizeof(utf7), "UTF-7", fromUnicodeOffsets,FALSE);
|
||||
testConvertFromU(unicode, sizeof(unicode)/U_SIZEOF_UCHAR, utf7, sizeof(utf7), "UTF-7", fromUnicodeOffsets,FALSE);
|
||||
|
||||
testConvertToU(utf7, sizeof(utf7), unicode, sizeof(unicode)/U_SIZEOF_UCHAR, "UTF-7", toUnicodeOffsets,FALSE);
|
||||
testConvertToU(utf7, sizeof(utf7), unicode, sizeof(unicode)/U_SIZEOF_UCHAR, "UTF-7", toUnicodeOffsets,FALSE);
|
||||
|
||||
testConvertFromU(unicode, sizeof(unicode)/U_SIZEOF_UCHAR, utf7Restricted, sizeof(utf7Restricted), "UTF-7,version=1", fromUnicodeOffsetsR,FALSE);
|
||||
testConvertFromU(unicode, sizeof(unicode)/U_SIZEOF_UCHAR, utf7Restricted, sizeof(utf7Restricted), "UTF-7,version=1", fromUnicodeOffsetsR,FALSE);
|
||||
|
||||
testConvertToU(utf7Restricted, sizeof(utf7Restricted), unicode, sizeof(unicode)/U_SIZEOF_UCHAR, "UTF-7,version=1", toUnicodeOffsetsR,FALSE);
|
||||
testConvertToU(utf7Restricted, sizeof(utf7Restricted), unicode, sizeof(unicode)/U_SIZEOF_UCHAR, "UTF-7,version=1", toUnicodeOffsetsR,FALSE);
|
||||
}
|
||||
|
||||
/*
|
||||
* IMAP-mailbox-name examples are mostly from http://www.imc.org/rfc2152,
|
||||
* modified according to RFC 2060,
|
||||
* and supplemented with the one example in RFC 2060 itself.
|
||||
*/
|
||||
{
|
||||
static const uint8_t imap[] = {
|
||||
/* Hi Mom -&Jjo--!
|
||||
A&ImIDkQ-.
|
||||
&-
|
||||
&ZeVnLIqe-
|
||||
\
|
||||
~peter
|
||||
/mail
|
||||
/&ZeVnLIqe-
|
||||
/&U,BTFw-
|
||||
*/
|
||||
0x48, 0x69, 0x20, 0x4d, 0x6f, 0x6d, 0x20, 0x2d, 0x26, 0x4a, 0x6a, 0x6f, 0x2d, 0x2d, 0x21,
|
||||
0x41, 0x26, 0x49, 0x6d, 0x49, 0x44, 0x6b, 0x51, 0x2d, 0x2e,
|
||||
0x26, 0x2d,
|
||||
0x26, 0x5a, 0x65, 0x56, 0x6e, 0x4c, 0x49, 0x71, 0x65, 0x2d,
|
||||
0x5c,
|
||||
0x7e, 0x70, 0x65, 0x74, 0x65, 0x72,
|
||||
0x2f, 0x6d, 0x61, 0x69, 0x6c,
|
||||
0x2f, 0x26, 0x5a, 0x65, 0x56, 0x6e, 0x4c, 0x49, 0x71, 0x65, 0x2d,
|
||||
0x2f, 0x26, 0x55, 0x2c, 0x42, 0x54, 0x46, 0x77, 0x2d
|
||||
};
|
||||
static const UChar unicode[] = {
|
||||
/* Hi Mom -<WHITE SMILING FACE>-!
|
||||
A<NOT IDENTICAL TO><ALPHA>.
|
||||
&
|
||||
[Japanese word "nihongo"]
|
||||
\
|
||||
~peter
|
||||
/mail
|
||||
/<65e5, 672c, 8a9e>
|
||||
/<53f0, 5317>
|
||||
*/
|
||||
0x48, 0x69, 0x20, 0x4d, 0x6f, 0x6d, 0x20, 0x2d, 0x263a, 0x2d, 0x21,
|
||||
0x41, 0x2262, 0x0391, 0x2e,
|
||||
0x26,
|
||||
0x65e5, 0x672c, 0x8a9e,
|
||||
0x5c,
|
||||
0x7e, 0x70, 0x65, 0x74, 0x65, 0x72,
|
||||
0x2f, 0x6d, 0x61, 0x69, 0x6c,
|
||||
0x2f, 0x65e5, 0x672c, 0x8a9e,
|
||||
0x2f, 0x53f0, 0x5317
|
||||
};
|
||||
static const int32_t toUnicodeOffsets[] = {
|
||||
0, 1, 2, 3, 4, 5, 6, 7, 9, 13, 14,
|
||||
15, 17, 19, 24,
|
||||
25,
|
||||
28, 30, 33,
|
||||
37,
|
||||
38, 39, 40, 41, 42, 43,
|
||||
44, 45, 46, 47, 48,
|
||||
49, 51, 53, 56,
|
||||
60, 62, 64
|
||||
};
|
||||
static const int32_t fromUnicodeOffsets[] = {
|
||||
0, 1, 2, 3, 4, 5, 6, 7, 8, 8, 8, 8, 8, 9, 10,
|
||||
11, 12, 12, 12, 13, 13, 13, 13, 13, 14,
|
||||
15, 15,
|
||||
16, 16, 16, 17, 17, 17, 18, 18, 18, 18,
|
||||
19,
|
||||
20, 21, 22, 23, 24, 25,
|
||||
26, 27, 28, 29, 30,
|
||||
31, 32, 32, 32, 33, 33, 33, 34, 34, 34, 34,
|
||||
35, 36, 36, 36, 37, 37, 37, 37, 37
|
||||
};
|
||||
|
||||
testConvertFromU(unicode, sizeof(unicode)/U_SIZEOF_UCHAR, imap, sizeof(imap), "IMAP-mailbox-name", fromUnicodeOffsets,FALSE);
|
||||
|
||||
testConvertToU(imap, sizeof(imap), unicode, sizeof(unicode)/U_SIZEOF_UCHAR, "IMAP-mailbox-name", toUnicodeOffsets,FALSE);
|
||||
}
|
||||
|
||||
/* Test UTF-8 bad data handling*/
|
||||
@ -1224,6 +1304,7 @@ static void TestConverterTypesAndStarters()
|
||||
TestConverterType("x-iscii-de", UCNV_ISCII);
|
||||
TestConverterType("ascii", UCNV_US_ASCII);
|
||||
TestConverterType("utf-7", UCNV_UTF7);
|
||||
TestConverterType("IMAP-mailbox-name", UCNV_IMAP_MAILBOX);
|
||||
TestConverterType("bocu-1", UCNV_BOCU1);
|
||||
}
|
||||
|
||||
@ -1595,6 +1676,50 @@ static TestUTF7() {
|
||||
ucnv_close(cnv);
|
||||
}
|
||||
|
||||
void
|
||||
static TestIMAP() {
|
||||
/* test input */
|
||||
static const uint8_t in[]={
|
||||
/* H - &Jjo- - ! &- &2AHcAQ- \ */
|
||||
0x48,
|
||||
0x2d,
|
||||
0x26, 0x4a, 0x6a, 0x6f,
|
||||
0x2d, 0x2d,
|
||||
0x21,
|
||||
0x26, 0x2d,
|
||||
0x26, 0x32, 0x41, 0x48, 0x63, 0x41, 0x51, 0x2d
|
||||
};
|
||||
|
||||
/* expected test results */
|
||||
static const uint32_t results[]={
|
||||
/* number of bytes read, code point */
|
||||
1, 0x48,
|
||||
1, 0x2d,
|
||||
4, 0x263a, /* <WHITE SMILING FACE> */
|
||||
2, 0x2d,
|
||||
1, 0x21,
|
||||
2, 0x26,
|
||||
7, 0x10401
|
||||
};
|
||||
|
||||
const char *cnvName;
|
||||
const char *source=(const char *)in, *limit=(const char *)in+sizeof(in);
|
||||
UErrorCode errorCode=U_ZERO_ERROR;
|
||||
UConverter *cnv=ucnv_open("IMAP-mailbox-name", &errorCode);
|
||||
if(U_FAILURE(errorCode)) {
|
||||
log_err("Unable to open a IMAP-mailbox-name converter: %s\n", u_errorName(errorCode)); /* sholdn't be a data err */
|
||||
return;
|
||||
}
|
||||
TestNextUChar(cnv, source, limit, results, "IMAP-mailbox-name");
|
||||
/* Test the condition when source >= sourceLimit */
|
||||
TestNextUCharError(cnv, source, source, U_INDEX_OUTOFBOUNDS_ERROR, "sourceLimit <= source");
|
||||
cnvName = ucnv_getName(cnv, &errorCode);
|
||||
if (U_FAILURE(errorCode) || uprv_strcmp(cnvName, "IMAP-mailbox-name") != 0) {
|
||||
log_err("IMAP-mailbox-name converter is called %s: %s\n", cnvName, u_errorName(errorCode));
|
||||
}
|
||||
ucnv_close(cnv);
|
||||
}
|
||||
|
||||
void
|
||||
static TestUTF8() {
|
||||
/* test input */
|
||||
@ -2596,7 +2721,9 @@ TestGetNextUChar2022(UConverter* cnv, const char* source, const char* limit,
|
||||
while(s<limit) {
|
||||
s0=s;
|
||||
c=ucnv_getNextUChar(cnv, &s, limit, &errorCode);
|
||||
if(U_FAILURE(errorCode)) {
|
||||
if(errorCode==U_INDEX_OUTOFBOUNDS_ERROR) {
|
||||
break; /* no more significant input */
|
||||
} else if(U_FAILURE(errorCode)) {
|
||||
log_err("%s ucnv_getNextUChar() failed: %s\n", message, u_errorName(errorCode));
|
||||
break;
|
||||
} else {
|
||||
@ -3196,6 +3323,8 @@ TestRoundTrippingAllUTF(void){
|
||||
TestFullRoundtrip("UTF-7");
|
||||
log_verbose("Running exhaustive round trip test for UTF-7\n");
|
||||
TestFullRoundtrip("UTF-7,version=1");
|
||||
log_verbose("Running exhaustive round trip test for IMAP-mailbox-name\n");
|
||||
TestFullRoundtrip("IMAP-mailbox-name");
|
||||
log_verbose("Running exhaustive round trip test for GB18030\n");
|
||||
TestFullRoundtrip("GB18030");
|
||||
}
|
||||
|
Loading…
Reference in New Issue
Block a user