ICU-2466 add IMAP-mailbox-name converter

X-SVN-Rev: 10187
2002-11-07 21:02:24 +00:00 · 2002-11-07 21:02:24 +00:00 · 442a78aeec
commit 442a78aeec
parent 54d2cd87e5
8 changed files with 914 additions and 16 deletions
--- a/icu4c/source/common/ucnv_bld.c
+++ b/icu4c/source/common/ucnv_bld.c
@ -78,7 +78,7 @@ converterData[UCNV_NUMBER_OF_SUPPORTED_CONVERTER_TYPES]={
 #endif

    &_ASCIIData,
-    &_UTF7Data, &_Bocu1Data, &_UTF16Data, &_UTF32Data, &_CESU8Data
+    &_UTF7Data, &_Bocu1Data, &_UTF16Data, &_UTF32Data, &_CESU8Data, &_IMAPData
 };

 /* Please keep this in binary sorted order for getAlgorithmicTypeFromName.
@ -93,6 +93,7 @@ static struct {
  { "cesu8", UCNV_CESU8 },
 #if !UCONFIG_NO_LEGACY_CONVERSION
  { "hz",UCNV_HZ },
+  { "imapmailboxname", UCNV_IMAP_MAILBOX },
  { "iscii", UCNV_ISCII },
  { "iso2022", UCNV_ISO_2022 },
 #endif
--- a/icu4c/source/common/ucnv_cnv.h
+++ b/icu4c/source/common/ucnv_cnv.h
@ -158,7 +158,7 @@ extern const UConverterSharedData
    _LMBCSData1,_LMBCSData2, _LMBCSData3, _LMBCSData4, _LMBCSData5, _LMBCSData6,
    _LMBCSData8,_LMBCSData11,_LMBCSData16,_LMBCSData17,_LMBCSData18,_LMBCSData19,
    _HZData,_ISCIIData, _SCSUData, _ASCIIData,
-    _UTF7Data, _Bocu1Data, _UTF16Data, _UTF32Data, _CESU8Data;
+    _UTF7Data, _Bocu1Data, _UTF16Data, _UTF32Data, _CESU8Data, _IMAPData;

 U_CDECL_END

--- a/icu4c/source/common/ucnv_u7.c
+++ b/icu4c/source/common/ucnv_u7.c
@ -498,7 +498,7 @@ callback:
 static UChar32
 _UTF7GetNextUChar(UConverterToUnicodeArgs *pArgs,
                  UErrorCode *pErrorCode) {
-    return ucnv_getNextUCharFromToUImpl(pArgs, _UTF7ToUnicodeWithOffsets, TRUE, pErrorCode);
+    return ucnv_getNextUCharFromToUImpl(pArgs, pArgs->converter->sharedData->impl->toUnicode, TRUE, pErrorCode);
 }

 static void
@ -618,7 +618,7 @@ unicodeMode:
                        if(target<targetLimit) {
                            *target++=MINUS;
                            if(offsets!=NULL) {
-                                *offsets++=sourceIndex;
+                                *offsets++=sourceIndex-1;
                            }
                        } else {
                            cnv->charErrorBuffer[0]=MINUS;
@ -744,8 +744,7 @@ unicodeMode:
                    *offsets++=sourceIndex-1;
                }
            } else {
-                cnv->charErrorBuffer[0]=toBase64[bits];
-                cnv->charErrorBufferLength=1;
+                cnv->charErrorBuffer[cnv->charErrorBufferLength++]=toBase64[bits];
                *pErrorCode=U_BUFFER_OVERFLOW_ERROR;
            }
        }
@ -814,3 +813,738 @@ const UConverterSharedData _UTF7Data={
    NULL, NULL, &_UTF7StaticData, FALSE, &_UTF7Impl,
    0
 };
+
+/* IMAP mailbox name encoding ----------------------------------------------- */
+
+/*
+ * RFC 2060: INTERNET MESSAGE ACCESS PROTOCOL - VERSION 4rev1
+ * http://www.ietf.org/rfc/rfc2060.txt
+ *
+ * 5.1.3.  Mailbox International Naming Convention
+ *
+ * By convention, international mailbox names are specified using a
+ * modified version of the UTF-7 encoding described in [UTF-7].  The
+ * purpose of these modifications is to correct the following problems
+ * with UTF-7:
+ *
+ *    1) UTF-7 uses the "+" character for shifting; this conflicts with
+ *       the common use of "+" in mailbox names, in particular USENET
+ *       newsgroup names.
+ *
+ *    2) UTF-7's encoding is BASE64 which uses the "/" character; this
+ *       conflicts with the use of "/" as a popular hierarchy delimiter.
+ *
+ *    3) UTF-7 prohibits the unencoded usage of "\"; this conflicts with
+ *       the use of "\" as a popular hierarchy delimiter.
+ *
+ *    4) UTF-7 prohibits the unencoded usage of "~"; this conflicts with
+ *       the use of "~" in some servers as a home directory indicator.
+ *
+ *    5) UTF-7 permits multiple alternate forms to represent the same
+ *       string; in particular, printable US-ASCII chararacters can be
+ *       represented in encoded form.
+ *
+ * In modified UTF-7, printable US-ASCII characters except for "&"
+ * represent themselves; that is, characters with octet values 0x20-0x25
+ * and 0x27-0x7e.  The character "&" (0x26) is represented by the two-
+ * octet sequence "&-".
+ *
+ * All other characters (octet values 0x00-0x1f, 0x7f-0xff, and all
+ * Unicode 16-bit octets) are represented in modified BASE64, with a
+ * further modification from [UTF-7] that "," is used instead of "/".
+ * Modified BASE64 MUST NOT be used to represent any printing US-ASCII
+ * character which can represent itself.
+ *
+ * "&" is used to shift to modified BASE64 and "-" to shift back to US-
+ * ASCII.  All names start in US-ASCII, and MUST end in US-ASCII (that
+ * is, a name that ends with a Unicode 16-bit octet MUST end with a "-
+ * ").
+ *
+ * For example, here is a mailbox name which mixes English, Japanese,
+ * and Chinese text: ~peter/mail/&ZeVnLIqe-/&U,BTFw-
+ */
+
+/*
+ * Tests for US-ASCII characters belonging to character classes
+ * defined in UTF-7.
+ *
+ * Set D (directly encoded characters) consists of the following
+ * characters: the upper and lower case letters A through Z
+ * and a through z, the 10 digits 0-9, and the following nine special
+ * characters (note that "+" and "=" are omitted):
+ *     '(),-./:?
+ *
+ * Set O (optional direct characters) consists of the following
+ * characters (note that "\" and "~" are omitted):
+ *     !"#$%&*;<=>@[]^_`{|}
+ *
+ * According to the rules in RFC 2152, the byte values for the following
+ * US-ASCII characters are not used in UTF-7 and are therefore illegal:
+ * - all C0 control codes except for CR LF TAB
+ * - BACKSLASH
+ * - TILDE
+ * - DEL
+ * - all codes beyond US-ASCII, i.e. all >127
+ */
+
+/* uses '&' not '+' to start a base64 sequence */
+#define AMPERSAND 0x26
+#define COMMA 0x2c
+#define SLASH 0x2f
+
+/* legal byte values: all US-ASCII graphic characters 0x20..0x7e */
+#define isLegalIMAP(c) (0x20<=(c) && (c)<=0x7e)
+
+/* direct-encode all of printable ASCII 0x20..0x7e except '&' 0x26 */
+#define inSetDIMAP(c) (isLegalIMAP(c) && c!=AMPERSAND)
+
+#define TO_BASE64_IMAP(n) ((n)<63 ? toBase64[n] : COMMA)
+#define FROM_BASE64_IMAP(c) ((c)==COMMA ? 63 : (c)==SLASH ? -1 : fromBase64[c])
+
+/*
+ * converter status values:
+ *
+ * toUnicodeStatus:
+ *     24 inDirectMode (boolean)
+ * 23..16 base64Counter (-1..7)
+ * 15..0  bits (up to 14 bits incoming base64)
+ *
+ * fromUnicodeStatus:
+ *     24 inDirectMode (boolean)
+ * 23..16 base64Counter (0..2)
+ *  7..0  bits (6 bits outgoing base64)
+ *
+ * ignore bits 31..25
+ */
+
+static void
+_IMAPToUnicodeWithOffsets(UConverterToUnicodeArgs *pArgs,
+                          UErrorCode *pErrorCode) {
+    UConverter *cnv;
+    const uint8_t *source, *sourceLimit;
+    UChar *target;
+    const UChar *targetLimit;
+    int32_t *offsets;
+
+    uint8_t *bytes;
+    uint8_t byteIndex;
+
+    int32_t length, targetCapacity;
+
+    /* UTF-7 state */
+    uint16_t bits;
+    int8_t base64Counter;
+    UBool inDirectMode;
+
+    int8_t base64Value;
+
+    int32_t sourceIndex, nextSourceIndex;
+
+    UChar c;
+    uint8_t b;
+
+    /* set up the local pointers */
+    cnv=pArgs->converter;
+
+    source=(const uint8_t *)pArgs->source;
+    sourceLimit=(const uint8_t *)pArgs->sourceLimit;
+    target=pArgs->target;
+    targetLimit=pArgs->targetLimit;
+    offsets=pArgs->offsets;
+    /* get the state machine state */
+    {
+        uint32_t status=cnv->toUnicodeStatus;
+        inDirectMode=(UBool)((status>>24)&1);
+        base64Counter=(int8_t)(status>>16);
+        bits=(uint16_t)status;
+    }
+    bytes=cnv->toUBytes;
+    byteIndex=cnv->toULength;
+
+    /* sourceIndex=-1 if the current character began in the previous buffer */
+    sourceIndex=byteIndex==0 ? 0 : -1;
+    nextSourceIndex=0;
+
+loop:
+    if(inDirectMode) {
+directMode:
+        /*
+         * In Direct Mode, US-ASCII characters are encoded directly, i.e.,
+         * with their US-ASCII byte values.
+         * An ampersand starts Unicode (or "escape") Mode.
+         *
+         * In Direct Mode, only the sourceIndex is used.
+         */
+        byteIndex=0;
+        length=sourceLimit-source;
+        targetCapacity=targetLimit-target;
+        if(length>targetCapacity) {
+            length=targetCapacity;
+        }
+        while(length>0) {
+            b=*source++;
+            if(!isLegalIMAP(b)) {
+                /* illegal */
+                bytes[0]=b;
+                byteIndex=1;
+                nextSourceIndex=sourceIndex+1;
+                goto callback;
+            } else if(b!=AMPERSAND) {
+                /* write directly encoded character */
+                *target++=b;
+                if(offsets!=NULL) {
+                    *offsets++=sourceIndex++;
+                }
+            } else /* AMPERSAND */ {
+                /* switch to Unicode mode */
+                nextSourceIndex=++sourceIndex;
+                inDirectMode=FALSE;
+                byteIndex=0;
+                bits=0;
+                base64Counter=-1;
+                goto unicodeMode;
+            }
+            --length;
+        }
+        if(source<sourceLimit && target>=targetLimit) {
+            /* target is full */
+            *pErrorCode=U_BUFFER_OVERFLOW_ERROR;
+        }
+    } else {
+unicodeMode:
+        /*
+         * In Unicode (or "escape") Mode, UTF-16BE is base64-encoded.
+         * The base64 sequence ends with any character that is not in the base64 alphabet.
+         * A terminating minus sign is consumed.
+         * US-ASCII must not be base64-ed.
+         *
+         * In Unicode Mode, the sourceIndex has the index to the start of the current
+         * base64 bytes, while nextSourceIndex is precisely parallel to source,
+         * keeping the index to the following byte.
+         * Note that in 2 out of 3 cases, UChars overlap within a base64 byte.
+         */
+        while(source<sourceLimit) {
+            if(target<targetLimit) {
+                bytes[byteIndex++]=b=*source++;
+                ++nextSourceIndex;
+                if(b>0x7e) {
+                    /* illegal - test other illegal US-ASCII values by base64Value==-3 */
+                    inDirectMode=TRUE;
+                    goto callback;
+                } else if((base64Value=FROM_BASE64_IMAP(b))>=0) {
+                    /* collect base64 bytes into UChars */
+                    switch(base64Counter) {
+                    case -1: /* -1 is immediately after the & */
+                    case 0:
+                        bits=base64Value;
+                        base64Counter=1;
+                        break;
+                    case 1:
+                    case 3:
+                    case 4:
+                    case 6:
+                        bits=(uint16_t)((bits<<6)|base64Value);
+                        ++base64Counter;
+                        break;
+                    case 2:
+                        c=(UChar)((bits<<4)|(base64Value>>2));
+                        if(isLegalIMAP(c)) {
+                            /* illegal */
+                            inDirectMode=TRUE;
+                            goto callback;
+                        }
+                        *target++=c;
+                        if(offsets!=NULL) {
+                            *offsets++=sourceIndex;
+                            sourceIndex=nextSourceIndex-1;
+                        }
+                        bytes[0]=b; /* keep this byte in case an error occurs */
+                        byteIndex=1;
+                        bits=(uint16_t)(base64Value&3);
+                        base64Counter=3;
+                        break;
+                    case 5:
+                        c=(UChar)((bits<<2)|(base64Value>>4));
+                        if(isLegalIMAP(c)) {
+                            /* illegal */
+                            inDirectMode=TRUE;
+                            goto callback;
+                        }
+                        *target++=c;
+                        if(offsets!=NULL) {
+                            *offsets++=sourceIndex;
+                            sourceIndex=nextSourceIndex-1;
+                        }
+                        bytes[0]=b; /* keep this byte in case an error occurs */
+                        byteIndex=1;
+                        bits=(uint16_t)(base64Value&15);
+                        base64Counter=6;
+                        break;
+                    case 7:
+                        c=(UChar)((bits<<6)|base64Value);
+                        if(isLegalIMAP(c)) {
+                            /* illegal */
+                            inDirectMode=TRUE;
+                            goto callback;
+                        }
+                        *target++=c;
+                        if(offsets!=NULL) {
+                            *offsets++=sourceIndex;
+                            sourceIndex=nextSourceIndex;
+                        }
+                        byteIndex=0;
+                        bits=0;
+                        base64Counter=0;
+                        break;
+                    default:
+                        /* will never occur */
+                        break;
+                    }
+                } else if(base64Value==-2) {
+                    /* minus sign terminates the base64 sequence */
+                    inDirectMode=TRUE;
+                    if(base64Counter==-1) {
+                        /* &- i.e. a minus immediately following an ampersand */
+                        *target++=AMPERSAND;
+                        if(offsets!=NULL) {
+                            *offsets++=sourceIndex-1;
+                        }
+                    } else {
+                        /* absorb the minus and leave the Unicode Mode */
+                        if(bits!=0 || (base64Counter!=0 && base64Counter!=3 && base64Counter!=6)) {
+                            /* bits are illegally left over, a UChar is incomplete */
+                            /* base64Counter other than 0, 3, 6 means non-minimal zero-padding, also illegal */
+                            goto callback;
+                        }
+                    }
+                    sourceIndex=nextSourceIndex;
+                    goto directMode;
+                } else {
+                    if(base64Counter==-1) {
+                        /* illegal: & immediately followed by something other than base64 or minus sign */
+                        /* include the ampersand in the reported sequence */
+                        --sourceIndex;
+                        bytes[0]=AMPERSAND;
+                        bytes[1]=b;
+                        byteIndex=2;
+                    }
+                    /* base64Value==-1 for characters that are illegal only in Unicode mode */
+                    /* base64Value==-3 for illegal characters */
+                    /* illegal */
+                    inDirectMode=TRUE;
+                    goto callback;
+                }
+            } else {
+                /* target is full */
+                *pErrorCode=U_BUFFER_OVERFLOW_ERROR;
+                break;
+            }
+        }
+    }
+endloop:
+
+    if(pArgs->flush && source>=sourceLimit) {
+        /* reset the state for the next conversion */
+        if(!inDirectMode && U_SUCCESS(*pErrorCode)) {
+            /* a character byte sequence remains incomplete - IMAP must end in ASCII/direct mode */
+            *pErrorCode=U_TRUNCATED_CHAR_FOUND;
+        }
+        cnv->toUnicodeStatus=0x1000000; /* inDirectMode=TRUE */
+        cnv->toULength=0;
+    } else {
+        /* set the converter state back into UConverter */
+        cnv->toUnicodeStatus=((uint32_t)inDirectMode<<24)|((uint32_t)((uint8_t)base64Counter)<<16)|(uint32_t)bits;
+        cnv->toULength=byteIndex;
+    }
+
+finish:
+    /* write back the updated pointers */
+    pArgs->source=(const char *)source;
+    pArgs->target=target;
+    pArgs->offsets=offsets;
+    return;
+
+callback:
+    /* call the callback function with all the preparations and post-processing */
+    /* update the arguments structure */
+    pArgs->source=(const char *)source;
+    pArgs->target=target;
+    pArgs->offsets=offsets;
+
+    /* copy the current bytes to invalidCharBuffer */
+    for(b=0; b<(uint8_t)byteIndex; ++b) {
+        cnv->invalidCharBuffer[b]=(char)bytes[b];
+    }
+    cnv->invalidCharLength=byteIndex;
+
+    /* set the converter state in UConverter to deal with the next character */
+    cnv->toUnicodeStatus=(uint32_t)inDirectMode<<24;
+    cnv->toULength=0;
+
+    /* call the callback function */
+    *pErrorCode=U_ILLEGAL_CHAR_FOUND;
+    cnv->fromCharErrorBehaviour(cnv->toUContext, pArgs, cnv->invalidCharBuffer, cnv->invalidCharLength, UCNV_ILLEGAL, pErrorCode);
+
+    /* get the converter state from UConverter */
+    {
+        uint32_t status=cnv->toUnicodeStatus;
+        inDirectMode=(UBool)((status>>24)&1);
+        base64Counter=(int8_t)(status>>16);
+        bits=(uint16_t)status;
+    }
+    byteIndex=cnv->toULength;
+
+    /* update target and deal with offsets if necessary */
+    offsets=ucnv_updateCallbackOffsets(offsets, pArgs->target-target, sourceIndex);
+    target=pArgs->target;
+
+    /* update the source pointer and index */
+    sourceIndex=nextSourceIndex+((const uint8_t *)pArgs->source-source);
+    source=(const uint8_t *)pArgs->source;
+
+    /*
+     * If the callback overflowed the target, then we need to
+     * stop here with an overflow indication.
+     */
+    if(*pErrorCode==U_BUFFER_OVERFLOW_ERROR) {
+        goto endloop;
+    } else if(cnv->UCharErrorBufferLength>0) {
+        /* target is full */
+        *pErrorCode=U_BUFFER_OVERFLOW_ERROR;
+        goto endloop;
+    } else if(U_FAILURE(*pErrorCode)) {
+        /* break on error */
+        cnv->toUnicodeStatus=0x1000000; /* inDirectMode=TRUE */
+        cnv->toULength=0;
+        goto finish;
+    } else {
+        goto loop;
+    }
+}
+
+static void
+_IMAPFromUnicodeWithOffsets(UConverterFromUnicodeArgs *pArgs,
+                            UErrorCode *pErrorCode) {
+    UConverter *cnv;
+    const UChar *source, *sourceLimit;
+    uint8_t *target, *targetLimit;
+    int32_t *offsets;
+
+    int32_t length, targetCapacity, sourceIndex;
+    UChar c;
+    uint8_t b;
+
+    /* UTF-7 state */
+    uint8_t bits;
+    int8_t base64Counter;
+    UBool inDirectMode;
+
+    /* set up the local pointers */
+    cnv=pArgs->converter;
+
+    /* set up the local pointers */
+    source=pArgs->source;
+    sourceLimit=pArgs->sourceLimit;
+    target=(uint8_t *)pArgs->target;
+    targetLimit=(uint8_t *)pArgs->targetLimit;
+    offsets=pArgs->offsets;
+
+    /* get the state machine state */
+    {
+        uint32_t status=cnv->fromUnicodeStatus;
+        inDirectMode=(UBool)((status>>24)&1);
+        base64Counter=(int8_t)(status>>16);
+        bits=(uint8_t)status;
+    }
+
+    /* UTF-7 always encodes UTF-16 code units, therefore we need only a simple sourceIndex */
+    sourceIndex=0;
+
+    if(inDirectMode) {
+directMode:
+        length=sourceLimit-source;
+        targetCapacity=targetLimit-target;
+        if(length>targetCapacity) {
+            length=targetCapacity;
+        }
+        while(length>0) {
+            c=*source++;
+            /* encode 0x20..0x7e except '&' directly */
+            if(inSetDIMAP(c)) {
+                /* encode directly */
+                *target++=(uint8_t)c;
+                if(offsets!=NULL) {
+                    *offsets++=sourceIndex++;
+                }
+            } else if(c==AMPERSAND) {
+                /* output &- for & */
+                *target++=AMPERSAND;
+                if(target<targetLimit) {
+                    *target++=MINUS;
+                    if(offsets!=NULL) {
+                        *offsets++=sourceIndex;
+                        *offsets++=sourceIndex++;
+                    }
+                    /* realign length and targetCapacity */
+                    goto directMode;
+                } else {
+                    if(offsets!=NULL) {
+                        *offsets++=sourceIndex++;
+                    }
+                    cnv->charErrorBuffer[0]=MINUS;
+                    cnv->charErrorBufferLength=1;
+                    *pErrorCode=U_BUFFER_OVERFLOW_ERROR;
+                    break;
+                }
+            } else {
+                /* un-read this character and switch to Unicode Mode */
+                --source;
+                *target++=AMPERSAND;
+                if(offsets!=NULL) {
+                    *offsets++=sourceIndex;
+                }
+                inDirectMode=FALSE;
+                base64Counter=0;
+                goto unicodeMode;
+            }
+            --length;
+        }
+        if(source<sourceLimit && target>=targetLimit) {
+            /* target is full */
+            *pErrorCode=U_BUFFER_OVERFLOW_ERROR;
+        }
+    } else {
+unicodeMode:
+        while(source<sourceLimit) {
+            if(target<targetLimit) {
+                c=*source++;
+                if(isLegalIMAP(c)) {
+                    /* encode directly */
+                    inDirectMode=TRUE;
+
+                    /* trick: back out this character to make this easier */
+                    --source;
+
+                    /* terminate the base64 sequence */
+                    if(base64Counter!=0) {
+                        /* write remaining bits for the previous character */
+                        *target++=TO_BASE64_IMAP(bits);
+                        if(offsets!=NULL) {
+                            *offsets++=sourceIndex-1;
+                        }
+                    }
+                    /* need to terminate with a minus */
+                    if(target<targetLimit) {
+                        *target++=MINUS;
+                        if(offsets!=NULL) {
+                            *offsets++=sourceIndex-1;
+                        }
+                    } else {
+                        cnv->charErrorBuffer[0]=MINUS;
+                        cnv->charErrorBufferLength=1;
+                        *pErrorCode=U_BUFFER_OVERFLOW_ERROR;
+                        break;
+                    }
+                    goto directMode;
+                } else {
+                    /*
+                     * base64 this character:
+                     * Output 2 or 3 base64 bytes for the remaining bits of the previous character
+                     * and the bits of this character, each implicitly in UTF-16BE.
+                     *
+                     * Here, bits is an 8-bit variable because only 6 bits need to be kept from one
+                     * character to the next. The actual 2 or 4 bits are shifted to the left edge
+                     * of the 6-bits field 5..0 to make the termination of the base64 sequence easier.
+                     */
+                    switch(base64Counter) {
+                    case 0:
+                        b=(uint8_t)(c>>10);
+                        *target++=TO_BASE64_IMAP(b);
+                        if(target<targetLimit) {
+                            b=(uint8_t)((c>>4)&0x3f);
+                            *target++=TO_BASE64_IMAP(b);
+                            if(offsets!=NULL) {
+                                *offsets++=sourceIndex;
+                                *offsets++=sourceIndex++;
+                            }
+                        } else {
+                            if(offsets!=NULL) {
+                                *offsets++=sourceIndex++;
+                            }
+                            b=(uint8_t)((c>>4)&0x3f);
+                            cnv->charErrorBuffer[0]=TO_BASE64_IMAP(b);
+                            cnv->charErrorBufferLength=1;
+                            *pErrorCode=U_BUFFER_OVERFLOW_ERROR;
+                        }
+                        bits=(uint8_t)((c&15)<<2);
+                        base64Counter=1;
+                        break;
+                    case 1:
+                        b=(uint8_t)(bits|(c>>14));
+                        *target++=TO_BASE64_IMAP(b);
+                        if(target<targetLimit) {
+                            b=(uint8_t)((c>>8)&0x3f);
+                            *target++=TO_BASE64_IMAP(b);
+                            if(target<targetLimit) {
+                                b=(uint8_t)((c>>2)&0x3f);
+                                *target++=TO_BASE64_IMAP(b);
+                                if(offsets!=NULL) {
+                                    *offsets++=sourceIndex;
+                                    *offsets++=sourceIndex;
+                                    *offsets++=sourceIndex++;
+                                }
+                            } else {
+                                if(offsets!=NULL) {
+                                    *offsets++=sourceIndex;
+                                    *offsets++=sourceIndex++;
+                                }
+                                b=(uint8_t)((c>>2)&0x3f);
+                                cnv->charErrorBuffer[0]=TO_BASE64_IMAP(b);
+                                cnv->charErrorBufferLength=1;
+                                *pErrorCode=U_BUFFER_OVERFLOW_ERROR;
+                            }
+                        } else {
+                            if(offsets!=NULL) {
+                                *offsets++=sourceIndex++;
+                            }
+                            b=(uint8_t)((c>>8)&0x3f);
+                            cnv->charErrorBuffer[0]=TO_BASE64_IMAP(b);
+                            b=(uint8_t)((c>>2)&0x3f);
+                            cnv->charErrorBuffer[1]=TO_BASE64_IMAP(b);
+                            cnv->charErrorBufferLength=2;
+                            *pErrorCode=U_BUFFER_OVERFLOW_ERROR;
+                        }
+                        bits=(uint8_t)((c&3)<<4);
+                        base64Counter=2;
+                        break;
+                    case 2:
+                        b=(uint8_t)(bits|(c>>12));
+                        *target++=TO_BASE64_IMAP(b);
+                        if(target<targetLimit) {
+                            b=(uint8_t)((c>>6)&0x3f);
+                            *target++=TO_BASE64_IMAP(b);
+                            if(target<targetLimit) {
+                                b=(uint8_t)(c&0x3f);
+                                *target++=TO_BASE64_IMAP(b);
+                                if(offsets!=NULL) {
+                                    *offsets++=sourceIndex;
+                                    *offsets++=sourceIndex;
+                                    *offsets++=sourceIndex++;
+                                }
+                            } else {
+                                if(offsets!=NULL) {
+                                    *offsets++=sourceIndex;
+                                    *offsets++=sourceIndex++;
+                                }
+                                b=(uint8_t)(c&0x3f);
+                                cnv->charErrorBuffer[0]=TO_BASE64_IMAP(b);
+                                cnv->charErrorBufferLength=1;
+                                *pErrorCode=U_BUFFER_OVERFLOW_ERROR;
+                            }
+                        } else {
+                            if(offsets!=NULL) {
+                                *offsets++=sourceIndex++;
+                            }
+                            b=(uint8_t)((c>>6)&0x3f);
+                            cnv->charErrorBuffer[0]=TO_BASE64_IMAP(b);
+                            b=(uint8_t)(c&0x3f);
+                            cnv->charErrorBuffer[1]=TO_BASE64_IMAP(b);
+                            cnv->charErrorBufferLength=2;
+                            *pErrorCode=U_BUFFER_OVERFLOW_ERROR;
+                        }
+                        bits=0;
+                        base64Counter=0;
+                        break;
+                    default:
+                        /* will never occur */
+                        break;
+                    }
+                }
+            } else {
+                /* target is full */
+                *pErrorCode=U_BUFFER_OVERFLOW_ERROR;
+                break;
+            }
+        }
+    }
+
+    if(pArgs->flush && source>=sourceLimit) {
+        /* flush remaining bits to the target */
+        if(!inDirectMode) {
+            if(base64Counter!=0) {
+                if(target<targetLimit) {
+                    *target++=TO_BASE64_IMAP(bits);
+                    if(offsets!=NULL) {
+                        *offsets++=sourceIndex-1;
+                    }
+                } else {
+                    cnv->charErrorBuffer[cnv->charErrorBufferLength++]=TO_BASE64_IMAP(bits);
+                    *pErrorCode=U_BUFFER_OVERFLOW_ERROR;
+                }
+            }
+            /* need to terminate with a minus */
+            if(target<targetLimit) {
+                *target++=MINUS;
+                if(offsets!=NULL) {
+                    *offsets++=sourceIndex-1;
+                }
+            } else {
+                cnv->charErrorBuffer[cnv->charErrorBufferLength++]=MINUS;
+                *pErrorCode=U_BUFFER_OVERFLOW_ERROR;
+            }
+        }
+        /* reset the state for the next conversion */
+        cnv->fromUnicodeStatus=(cnv->fromUnicodeStatus&0xf0000000)|0x1000000; /* keep version, inDirectMode=TRUE */
+    } else {
+        /* set the converter state back into UConverter */
+        cnv->fromUnicodeStatus=
+            (cnv->fromUnicodeStatus&0xf0000000)|    /* keep version*/
+            ((uint32_t)inDirectMode<<24)|((uint32_t)base64Counter<<16)|(uint32_t)bits;
+    }
+
+    /* write back the updated pointers */
+    pArgs->source=source;
+    pArgs->target=(char *)target;
+    pArgs->offsets=offsets;
+    return;
+}
+
+static const UConverterImpl _IMAPImpl={
+    UCNV_IMAP_MAILBOX,
+
+    NULL,
+    NULL,
+
+    _UTF7Open,
+    NULL,
+    _UTF7Reset,
+
+    _IMAPToUnicodeWithOffsets,
+    _IMAPToUnicodeWithOffsets,
+    _IMAPFromUnicodeWithOffsets,
+    _IMAPFromUnicodeWithOffsets,
+    _UTF7GetNextUChar,
+
+    NULL,
+    NULL,
+    NULL /* we don't need writeSub() because we never call a callback at fromUnicode() */
+};
+
+static const UConverterStaticData _IMAPStaticData={
+    sizeof(UConverterStaticData),
+    "IMAP-mailbox-name",
+    0, /* TODO CCSID for UTF-7 */
+    UCNV_IBM, UCNV_IMAP_MAILBOX,
+    1, 4,
+    { 0x3f, 0, 0, 0 }, 1, /* the subchar is not used */
+    FALSE, FALSE,
+    0,
+    0,
+    { 0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0 } /* reserved */
+};
+
+const UConverterSharedData _IMAPData={
+    sizeof(UConverterSharedData), ~((uint32_t)0),
+    NULL, NULL, &_IMAPStaticData, FALSE, &_IMAPImpl,
+    0
+};
--- a/icu4c/source/common/unicode/ucnv.h
+++ b/icu4c/source/common/unicode/ucnv.h
@ -104,6 +104,7 @@ typedef enum {
    UCNV_UTF16,
    UCNV_UTF32,
    UCNV_CESU8,
+    UCNV_IMAP_MAILBOX,

    /* Number of converter types for which we have conversion routines. */
    UCNV_NUMBER_OF_SUPPORTED_CONVERTER_TYPES
--- a/icu4c/source/data/mappings/convrtrs.txt
+++ b/icu4c/source/data/mappings/convrtrs.txt
@ -148,6 +148,14 @@ UTF32_OppositeEndian
 # For details about email headers see RFC 2047.
 UTF-7 { IANA* MIME* }           cp65000

+# IMAP-mailbox-name is an ICU-specific name for the encoding of IMAP mailbox names.
+# It is a substantially modified UTF-7 encoding. See the specification in:
+#
+# RFC 2060: INTERNET MESSAGE ACCESS PROTOCOL - VERSION 4rev1
+# (http://www.ietf.org/rfc/rfc2060.txt)
+# Section 5.1.3.  Mailbox International Naming Convention
+IMAP-mailbox-name
+
 SCSU { IANA* }
 BOCU-1 { IANA* } csBOCU-1 { IANA }

--- a/icu4c/source/test/cintltst/nccbtst.c
+++ b/icu4c/source/test/cintltst/nccbtst.c
@ -1707,6 +1707,26 @@ static void TestSub(int32_t inputsize, int32_t outputsize)
        }
    }

+    log_verbose("Testing IMAP-mailbox-name toUnicode with substitute callbacks\n");
+    {
+        static const uint8_t bytes[]={
+         /* aDEL          a&AB~                         a&AB\x0c                      a&AB-                         a&AB.                         a&. */
+            0x61, 0x7f,   0x61, 0x26, 0x41, 0x42, 0x7e, 0x61, 0x26, 0x41, 0x42, 0x0c, 0x61, 0x26, 0x41, 0x42, 0x2d, 0x61, 0x26, 0x41, 0x42, 0x2e, 0x61, 0x26, 0x2e
+        };
+        static const UChar unicode[]={
+            0x61, 0xfffd, 0x61,       0xfffd,           0x61,       0xfffd,           0x61,       0xfffd,           0x61,       0xfffd,           0x61, 0xfffd
+        };
+        static const int32_t offsets[]={
+            0,    1,      2,          4,                7,          9,                12,         14,               17,         19,               22,   23
+        };
+
+        if(!testConvertToUnicode(bytes, ARRAY_LENGTH(bytes), unicode, ARRAY_LENGTH(unicode), "IMAP-mailbox-name", 
+                                 UCNV_TO_U_CALLBACK_SUBSTITUTE, offsets, NULL, 0)
+        ) {
+            log_err("IMAP-mailbox-name->u with substitute did not match.\n");
+        }
+    }
+
    log_verbose("Testing UTF-16 toUnicode with substitute callbacks\n");
    {
        static const uint8_t
--- a/icu4c/source/test/cintltst/ncnvtst.c
+++ b/icu4c/source/test/cintltst/ncnvtst.c
@ -1772,11 +1772,16 @@ doTestTruncated(const char *cnvName, const uint8_t *bytes, int32_t length) {

 static void
 TestTruncated() {
-    struct {
+    static const struct {
        const char *cnvName;
        uint8_t bytes[8]; /* partial input bytes resulting in no output */
        int32_t length;
    } testCases[]={
+        { "IMAP-mailbox-name",  { 0x26 }, 1 }, /* & */
+        { "IMAP-mailbox-name",  { 0x26, 0x42 }, 2 }, /* &B */
+        { "IMAP-mailbox-name",  { 0x26, 0x42, 0x42 }, 3 }, /* &BB */
+        { "IMAP-mailbox-name",  { 0x26, 0x41, 0x41 }, 3 }, /* &AA */
+
        { "UTF-7",      { 0x2b, 0x42 }, 2 }, /* +B */
        { "UTF-8",      { 0xd1 }, 1 },

--- a/icu4c/source/test/cintltst/nucnvtst.c
+++ b/icu4c/source/test/cintltst/nucnvtst.c
@ -34,6 +34,7 @@ static void TestConverterTypesAndStarters(void);
 static void TestAmbiguous(void);
 static void TestSignatureDetection(void);
 static void TestUTF7(void);
+static void TestIMAP(void);
 static void TestUTF8(void);
 static void TestCESU8(void);
 static void TestUTF16(void);
@ -145,7 +146,9 @@ TestNextUChar(UConverter* cnv, const char* source, const char* limit, const uint
     while(s<limit) {
        s0=s;
        c=ucnv_getNextUChar(cnv, &s, limit, &errorCode);
-        if(U_FAILURE(errorCode)) {
+        if(errorCode==U_INDEX_OUTOFBOUNDS_ERROR) {
+            break; /* no more significant input */
+        } else if(U_FAILURE(errorCode)) {
            log_err("%s ucnv_getNextUChar() failed: %s\n", message, u_errorName(errorCode));
            break;
        } else if((uint32_t)(s-s0)!=*r || c!=*(r+1)) {
@ -210,6 +213,7 @@ void addTestNewConvert(TestNode** root)
   addTest(root, &TestAmbiguous, "tsconv/nucnvtst/TestAmbiguous");
   addTest(root, &TestSignatureDetection, "tsconv/nucnvtst/TestSignatureDetection");
   addTest(root, &TestUTF7, "tsconv/nucnvtst/TestUTF7");
+   addTest(root, &TestIMAP, "tsconv/nucnvtst/TestIMAP");
   addTest(root, &TestUTF8, "tsconv/nucnvtst/TestUTF8");
   addTest(root, &TestCESU8, "tsconv/nucnvtst/TestCESU8");
   addTest(root, &TestUTF16, "tsconv/nucnvtst/TestUTF16");
@ -404,7 +408,7 @@ static ETestConvertResult testConvertFromU( const UChar *source, int sourceLen,
        log_err("\n");
        log_err("Got  :     ");
        for(p=junkout;p<targ;p++) {
-          log_err("%d, ", junokout[p-junkout]);
+          log_err("%d,", junokout[p-junkout]);
        }
        log_err("\n");
        log_err("Expected:  ");
@ -880,7 +884,7 @@ static void TestNewConvertWithBufferSizes(int32_t outsize, int32_t insize )
            27, 29, 32
        };
        static const int32_t fromUnicodeOffsets[] = {
-            0, 1, 2, 3, 4, 5, 6, 7, 8, 8, 8, 8, 9, 9, 10,
+            0, 1, 2, 3, 4, 5, 6, 7, 8, 8, 8, 8, 8, 9, 10,
            11, 12, 12, 12, 13, 13, 13, 13, 14,
            15, 15,
            16, 16, 16, 17, 17, 17, 18, 18, 18
@ -906,19 +910,95 @@ static void TestNewConvertWithBufferSizes(int32_t outsize, int32_t insize )
            31, 33, 36
        };
        static const int32_t fromUnicodeOffsetsR[] = {
-            0, 1, 2, 3, 4, 5, 6, 7, 8, 8, 8, 8, 9, 9, 10, 10, 10, 10, 11,
+            0, 1, 2, 3, 4, 5, 6, 7, 8, 8, 8, 8, 8, 9, 10, 10, 10, 10, 10,
            11, 12, 12, 12, 13, 13, 13, 13, 14,
            15, 15,
            16, 16, 16, 17, 17, 17, 18, 18, 18
        };

-        	testConvertFromU(unicode, sizeof(unicode)/U_SIZEOF_UCHAR, utf7, sizeof(utf7), "UTF-7", fromUnicodeOffsets,FALSE);
+        testConvertFromU(unicode, sizeof(unicode)/U_SIZEOF_UCHAR, utf7, sizeof(utf7), "UTF-7", fromUnicodeOffsets,FALSE);

-        	testConvertToU(utf7, sizeof(utf7), unicode, sizeof(unicode)/U_SIZEOF_UCHAR, "UTF-7", toUnicodeOffsets,FALSE);
+        testConvertToU(utf7, sizeof(utf7), unicode, sizeof(unicode)/U_SIZEOF_UCHAR, "UTF-7", toUnicodeOffsets,FALSE);

-        	testConvertFromU(unicode, sizeof(unicode)/U_SIZEOF_UCHAR, utf7Restricted, sizeof(utf7Restricted), "UTF-7,version=1", fromUnicodeOffsetsR,FALSE);
+        testConvertFromU(unicode, sizeof(unicode)/U_SIZEOF_UCHAR, utf7Restricted, sizeof(utf7Restricted), "UTF-7,version=1", fromUnicodeOffsetsR,FALSE);

-        	testConvertToU(utf7Restricted, sizeof(utf7Restricted), unicode, sizeof(unicode)/U_SIZEOF_UCHAR, "UTF-7,version=1", toUnicodeOffsetsR,FALSE);
+        testConvertToU(utf7Restricted, sizeof(utf7Restricted), unicode, sizeof(unicode)/U_SIZEOF_UCHAR, "UTF-7,version=1", toUnicodeOffsetsR,FALSE);
+    }
+
+    /*
+     * IMAP-mailbox-name examples are mostly from http://www.imc.org/rfc2152,
+     * modified according to RFC 2060,
+     * and supplemented with the one example in RFC 2060 itself.
+     */
+    {
+        static const uint8_t imap[] = {
+            /*  Hi Mom -&Jjo--!
+                A&ImIDkQ-.
+                &-
+                &ZeVnLIqe-
+                \
+                ~peter
+                /mail
+                /&ZeVnLIqe-
+                /&U,BTFw-
+            */
+            0x48, 0x69, 0x20, 0x4d, 0x6f, 0x6d, 0x20, 0x2d, 0x26, 0x4a, 0x6a, 0x6f, 0x2d, 0x2d, 0x21,
+            0x41, 0x26, 0x49, 0x6d, 0x49, 0x44, 0x6b, 0x51, 0x2d, 0x2e,
+            0x26, 0x2d,
+            0x26, 0x5a, 0x65, 0x56, 0x6e, 0x4c, 0x49, 0x71, 0x65, 0x2d,
+            0x5c,
+            0x7e, 0x70, 0x65, 0x74, 0x65, 0x72,
+            0x2f, 0x6d, 0x61, 0x69, 0x6c,
+            0x2f, 0x26, 0x5a, 0x65, 0x56, 0x6e, 0x4c, 0x49, 0x71, 0x65, 0x2d,
+            0x2f, 0x26, 0x55, 0x2c, 0x42, 0x54, 0x46, 0x77, 0x2d
+        };
+        static const UChar unicode[] = {
+            /*  Hi Mom -<WHITE SMILING FACE>-!
+                A<NOT IDENTICAL TO><ALPHA>.
+                &
+                [Japanese word "nihongo"]
+                \
+                ~peter
+                /mail
+                /<65e5, 672c, 8a9e>
+                /<53f0, 5317>
+            */
+            0x48, 0x69, 0x20, 0x4d, 0x6f, 0x6d, 0x20, 0x2d, 0x263a, 0x2d, 0x21,
+            0x41, 0x2262, 0x0391, 0x2e,
+            0x26,
+            0x65e5, 0x672c, 0x8a9e,
+            0x5c,
+            0x7e, 0x70, 0x65, 0x74, 0x65, 0x72,
+            0x2f, 0x6d, 0x61, 0x69, 0x6c,
+            0x2f, 0x65e5, 0x672c, 0x8a9e,
+            0x2f, 0x53f0, 0x5317
+        };
+        static const int32_t toUnicodeOffsets[] = {
+            0, 1, 2, 3, 4, 5, 6, 7, 9, 13, 14,
+            15, 17, 19, 24,
+            25,
+            28, 30, 33,
+            37,
+            38, 39, 40, 41, 42, 43,
+            44, 45, 46, 47, 48,
+            49, 51, 53, 56,
+            60, 62, 64
+        };
+        static const int32_t fromUnicodeOffsets[] = {
+            0, 1, 2, 3, 4, 5, 6, 7, 8, 8, 8, 8, 8, 9, 10,
+            11, 12, 12, 12, 13, 13, 13, 13, 13, 14,
+            15, 15,
+            16, 16, 16, 17, 17, 17, 18, 18, 18, 18,
+            19,
+            20, 21, 22, 23, 24, 25,
+            26, 27, 28, 29, 30,
+            31, 32, 32, 32, 33, 33, 33, 34, 34, 34, 34,
+            35, 36, 36, 36, 37, 37, 37, 37, 37
+        };
+
+        testConvertFromU(unicode, sizeof(unicode)/U_SIZEOF_UCHAR, imap, sizeof(imap), "IMAP-mailbox-name", fromUnicodeOffsets,FALSE);
+
+        testConvertToU(imap, sizeof(imap), unicode, sizeof(unicode)/U_SIZEOF_UCHAR, "IMAP-mailbox-name", toUnicodeOffsets,FALSE);
    }

    /* Test UTF-8 bad data handling*/
@ -1224,6 +1304,7 @@ static void TestConverterTypesAndStarters()
    TestConverterType("x-iscii-de", UCNV_ISCII);
    TestConverterType("ascii", UCNV_US_ASCII);
    TestConverterType("utf-7", UCNV_UTF7);
+    TestConverterType("IMAP-mailbox-name", UCNV_IMAP_MAILBOX);
    TestConverterType("bocu-1", UCNV_BOCU1);
 }

@ -1595,6 +1676,50 @@ static TestUTF7() {
    ucnv_close(cnv);
 }

+void
+static TestIMAP() {
+    /* test input */
+    static const uint8_t in[]={
+        /* H - &Jjo- - ! &- &2AHcAQ- \ */
+        0x48,
+        0x2d,
+        0x26, 0x4a, 0x6a, 0x6f,
+        0x2d, 0x2d,
+        0x21,
+        0x26, 0x2d,
+        0x26, 0x32, 0x41, 0x48, 0x63, 0x41, 0x51, 0x2d
+    };
+
+    /* expected test results */
+    static const uint32_t results[]={
+        /* number of bytes read, code point */
+        1, 0x48,
+        1, 0x2d,
+        4, 0x263a, /* <WHITE SMILING FACE> */
+        2, 0x2d,
+        1, 0x21,
+        2, 0x26,
+        7, 0x10401
+    };
+
+    const char *cnvName;
+    const char *source=(const char *)in, *limit=(const char *)in+sizeof(in);
+    UErrorCode errorCode=U_ZERO_ERROR;
+    UConverter *cnv=ucnv_open("IMAP-mailbox-name", &errorCode);
+    if(U_FAILURE(errorCode)) {
+        log_err("Unable to open a IMAP-mailbox-name converter: %s\n", u_errorName(errorCode)); /* sholdn't be a data err */
+        return;
+    }
+    TestNextUChar(cnv, source, limit, results, "IMAP-mailbox-name");
+    /* Test the condition when source >= sourceLimit */
+    TestNextUCharError(cnv, source, source, U_INDEX_OUTOFBOUNDS_ERROR, "sourceLimit <= source");
+    cnvName = ucnv_getName(cnv, &errorCode);
+    if (U_FAILURE(errorCode) || uprv_strcmp(cnvName, "IMAP-mailbox-name") != 0) {
+        log_err("IMAP-mailbox-name converter is called %s: %s\n", cnvName, u_errorName(errorCode));
+    }
+    ucnv_close(cnv);
+}
+
 void
 static TestUTF8() {
    /* test input */
@ -2596,7 +2721,9 @@ TestGetNextUChar2022(UConverter* cnv, const char* source, const char* limit,
     while(s<limit) {
        s0=s;
        c=ucnv_getNextUChar(cnv, &s, limit, &errorCode);
-        if(U_FAILURE(errorCode)) {
+        if(errorCode==U_INDEX_OUTOFBOUNDS_ERROR) {
+            break; /* no more significant input */
+        } else if(U_FAILURE(errorCode)) {
            log_err("%s ucnv_getNextUChar() failed: %s\n", message, u_errorName(errorCode));
            break;
        } else {
@ -3196,6 +3323,8 @@ TestRoundTrippingAllUTF(void){
        TestFullRoundtrip("UTF-7");
        log_verbose("Running exhaustive round trip test for UTF-7\n");
        TestFullRoundtrip("UTF-7,version=1");
+        log_verbose("Running exhaustive round trip test for IMAP-mailbox-name\n");
+        TestFullRoundtrip("IMAP-mailbox-name");
        log_verbose("Running exhaustive round trip test for GB18030\n");
        TestFullRoundtrip("GB18030");
    }