mirror of
https://sourceware.org/git/glibc.git
synced 2024-11-21 12:30:06 +00:00
iconv: Add UTF-7-IMAP variant in utf-7.c
UTF-7-IMAP differs from UTF-7 in the followings ways (see RFC 3501[1] for reference) : - The shift character is '&' instead of '+' - There is no "optional direct characters" and the "direct characters" set is different - There is no implicit shift back to US-ASCII from BASE64, all BASE64 sequences MUST be terminated with '-' [1]: https://datatracker.ietf.org/doc/html/rfc3501#section-5.1.3 Signed-off-by: Max Gautier <mg@max.gautier.name> Reviewed-by: Adhemerval Zanella <adhemerval.zanella@linaro.org>
This commit is contained in:
parent
ef7b963280
commit
9df157b4ed
@ -94,6 +94,7 @@ EUC-TW EUC-TW Y UTF8
|
|||||||
GBK GBK Y UTF8
|
GBK GBK Y UTF8
|
||||||
BIG5HKSCS BIG5HKSCS Y UTF8
|
BIG5HKSCS BIG5HKSCS Y UTF8
|
||||||
UTF-7 UTF-7 N UTF8
|
UTF-7 UTF-7 N UTF8
|
||||||
|
UTF-7-IMAP UTF-7-IMAP N UTF8
|
||||||
IBM856 IBM856 N UTF8
|
IBM856 IBM856 N UTF8
|
||||||
IBM922 IBM922 Y UTF8
|
IBM922 IBM922 Y UTF8
|
||||||
IBM930 IBM930 N UTF8
|
IBM930 IBM930 N UTF8
|
||||||
|
@ -113,3 +113,7 @@ module INTERNAL UTF-32BE// UTF-32 1
|
|||||||
alias UTF7// UTF-7//
|
alias UTF7// UTF-7//
|
||||||
module UTF-7// INTERNAL UTF-7 1
|
module UTF-7// INTERNAL UTF-7 1
|
||||||
module INTERNAL UTF-7// UTF-7 1
|
module INTERNAL UTF-7// UTF-7 1
|
||||||
|
|
||||||
|
# from to module cost
|
||||||
|
module UTF-7-IMAP// INTERNAL UTF-7 1
|
||||||
|
module INTERNAL UTF-7-IMAP// UTF-7 1
|
||||||
|
1
iconvdata/testdata/UTF-7-IMAP
vendored
Normal file
1
iconvdata/testdata/UTF-7-IMAP
vendored
Normal file
@ -0,0 +1 @@
|
|||||||
|
&EqASGxItEps- Amharic&AAoBDQ-esky Czech&AAo-Dansk Danish&AAo-English English&AAo-Suomi Finnish&AAo-Fran&AOc-ais French&AAo-Deutsch German&AAoDlQO7A7sDtwO9A7kDugOs- Greek&AAoF4gXRBegF2QXq- Hebrew&AAo-Italiano Italian&AAo-Norsk Norwegian&AAoEIARDBEEEQQQ6BDgEOQ- Russian&AAo-Espa&APE-ol Spanish&AAo-Svenska Swedish&AAoOIA4yDikOMg5EDhcOIg- Thai&AAo-T&APw-rk&AOc-e Turkish&AAo-Ti&Hr8-ng Vi&Hsc-t Vietnamese&AApl5Wcsip4- Japanese&AApOLWWH- Chinese&AArVXK4A- Korean&AAoACg-// Checking for correct handling of shift characters ('&-', '-') after base64 sequences&AArVXK4A-&-&AArVXK4A--&AAoACg-// Checking for correct handling of litteral '&-' and '-'&AAo----&-&--&AAoACg-// The last line of this file is missing the end-of-line terminator&AAo-// on purpose, in order to test that the conversion empties the bit buffer&AAo-// and shifts back to the initial state at the end of the conversion.&AAo-A&ImIDkQ-
|
32
iconvdata/testdata/UTF-7-IMAP..UTF8
vendored
Normal file
32
iconvdata/testdata/UTF-7-IMAP..UTF8
vendored
Normal file
@ -0,0 +1,32 @@
|
|||||||
|
አማርኛ Amharic
|
||||||
|
česky Czech
|
||||||
|
Dansk Danish
|
||||||
|
English English
|
||||||
|
Suomi Finnish
|
||||||
|
Français French
|
||||||
|
Deutsch German
|
||||||
|
Ελληνικά Greek
|
||||||
|
עברית Hebrew
|
||||||
|
Italiano Italian
|
||||||
|
Norsk Norwegian
|
||||||
|
Русский Russian
|
||||||
|
Español Spanish
|
||||||
|
Svenska Swedish
|
||||||
|
ภาษาไทย Thai
|
||||||
|
Türkçe Turkish
|
||||||
|
Tiếng Việt Vietnamese
|
||||||
|
日本語 Japanese
|
||||||
|
中文 Chinese
|
||||||
|
한글 Korean
|
||||||
|
|
||||||
|
// Checking for correct handling of shift characters ('&', '-') after base64 sequences
|
||||||
|
한글&
|
||||||
|
한글-
|
||||||
|
|
||||||
|
// Checking for correct handling of litteral '&' and '-'
|
||||||
|
---&&-
|
||||||
|
|
||||||
|
// The last line of this file is missing the end-of-line terminator
|
||||||
|
// on purpose, in order to test that the conversion empties the bit buffer
|
||||||
|
// and shifts back to the initial state at the end of the conversion.
|
||||||
|
A≢Α
|
@ -33,11 +33,13 @@
|
|||||||
enum variant
|
enum variant
|
||||||
{
|
{
|
||||||
UTF7,
|
UTF7,
|
||||||
|
UTF_7_IMAP
|
||||||
};
|
};
|
||||||
|
|
||||||
/* Must be in the same order as enum variant above. */
|
/* Must be in the same order as enum variant above. */
|
||||||
static const char names[] =
|
static const char names[] =
|
||||||
"UTF-7//\0"
|
"UTF-7//\0"
|
||||||
|
"UTF-7-IMAP//\0"
|
||||||
"\0";
|
"\0";
|
||||||
|
|
||||||
static uint32_t
|
static uint32_t
|
||||||
@ -45,6 +47,8 @@ shift_character (enum variant const var)
|
|||||||
{
|
{
|
||||||
if (var == UTF7)
|
if (var == UTF7)
|
||||||
return '+';
|
return '+';
|
||||||
|
else if (var == UTF_7_IMAP)
|
||||||
|
return '&';
|
||||||
else
|
else
|
||||||
abort ();
|
abort ();
|
||||||
}
|
}
|
||||||
@ -58,6 +62,9 @@ between (uint32_t const ch,
|
|||||||
|
|
||||||
/* The set of "direct characters":
|
/* The set of "direct characters":
|
||||||
A-Z a-z 0-9 ' ( ) , - . / : ? space tab lf cr
|
A-Z a-z 0-9 ' ( ) , - . / : ? space tab lf cr
|
||||||
|
FOR UTF-7-IMAP
|
||||||
|
A-Z a-z 0-9 ' ( ) , - . / : ? space
|
||||||
|
! " # $ % + * ; < = > @ [ \ ] ^ _ ` { | } ~
|
||||||
*/
|
*/
|
||||||
|
|
||||||
static bool
|
static bool
|
||||||
@ -71,6 +78,8 @@ isdirect (uint32_t ch, enum variant var)
|
|||||||
|| between (ch, ',', '/')
|
|| between (ch, ',', '/')
|
||||||
|| ch == ':' || ch == '?'
|
|| ch == ':' || ch == '?'
|
||||||
|| ch == ' ' || ch == '\t' || ch == '\n' || ch == '\r');
|
|| ch == ' ' || ch == '\t' || ch == '\n' || ch == '\r');
|
||||||
|
else if (var == UTF_7_IMAP)
|
||||||
|
return (ch != '&' && between (ch, ' ', '~'));
|
||||||
abort ();
|
abort ();
|
||||||
}
|
}
|
||||||
|
|
||||||
@ -124,6 +133,8 @@ base64 (unsigned int i, enum variant var)
|
|||||||
return '+';
|
return '+';
|
||||||
else if (i == 63 && var == UTF7)
|
else if (i == 63 && var == UTF7)
|
||||||
return '/';
|
return '/';
|
||||||
|
else if (i == 63 && var == UTF_7_IMAP)
|
||||||
|
return ',';
|
||||||
else
|
else
|
||||||
abort ();
|
abort ();
|
||||||
}
|
}
|
||||||
@ -308,7 +319,8 @@ gconv_end (struct __gconv_step *data)
|
|||||||
i = ch - '0' + 52; \
|
i = ch - '0' + 52; \
|
||||||
else if (ch == '+') \
|
else if (ch == '+') \
|
||||||
i = 62; \
|
i = 62; \
|
||||||
else if (ch == '/') \
|
else if ((var == UTF7 && ch == '/') \
|
||||||
|
|| (var == UTF_7_IMAP && ch == ',')) \
|
||||||
i = 63; \
|
i = 63; \
|
||||||
else \
|
else \
|
||||||
{ \
|
{ \
|
||||||
@ -316,8 +328,10 @@ gconv_end (struct __gconv_step *data)
|
|||||||
\
|
\
|
||||||
/* If accumulated data is nonzero, the input is invalid. */ \
|
/* If accumulated data is nonzero, the input is invalid. */ \
|
||||||
/* Also, partial UTF-16 characters are invalid. */ \
|
/* Also, partial UTF-16 characters are invalid. */ \
|
||||||
if (__builtin_expect (statep->__value.__wch != 0, 0) \
|
/* In IMAP variant, must be terminated by '-'. */ \
|
||||||
|| __builtin_expect ((statep->__count >> 3) <= 26, 0)) \
|
if (__glibc_unlikely (statep->__value.__wch != 0) \
|
||||||
|
|| __glibc_unlikely ((statep->__count >> 3) <= 26) \
|
||||||
|
|| __glibc_unlikely (var == UTF_7_IMAP && ch != '-')) \
|
||||||
{ \
|
{ \
|
||||||
STANDARD_FROM_LOOP_ERR_HANDLER ((statep->__count = 0, 1)); \
|
STANDARD_FROM_LOOP_ERR_HANDLER ((statep->__count = 0, 1)); \
|
||||||
} \
|
} \
|
||||||
@ -474,13 +488,15 @@ gconv_end (struct __gconv_step *data)
|
|||||||
else \
|
else \
|
||||||
{ \
|
{ \
|
||||||
/* base64 encoding active */ \
|
/* base64 encoding active */ \
|
||||||
if (isdirect (ch, var)) \
|
if ((var == UTF_7_IMAP && ch == '&') || isdirect (ch, var)) \
|
||||||
{ \
|
{ \
|
||||||
/* deactivate base64 encoding */ \
|
/* deactivate base64 encoding */ \
|
||||||
size_t count; \
|
size_t count; \
|
||||||
\
|
\
|
||||||
count = ((statep->__count & 0x18) >= 0x10) \
|
count = ((statep->__count & 0x18) >= 0x10) \
|
||||||
+ needs_explicit_shift (ch) + 1; \
|
+ (var == UTF_7_IMAP || needs_explicit_shift (ch)) \
|
||||||
|
+ (var == UTF_7_IMAP && ch == '&') \
|
||||||
|
+ 1; \
|
||||||
if (__glibc_unlikely (outptr + count > outend)) \
|
if (__glibc_unlikely (outptr + count > outend)) \
|
||||||
{ \
|
{ \
|
||||||
result = __GCONV_FULL_OUTPUT; \
|
result = __GCONV_FULL_OUTPUT; \
|
||||||
@ -489,9 +505,11 @@ gconv_end (struct __gconv_step *data)
|
|||||||
\
|
\
|
||||||
if ((statep->__count & 0x18) >= 0x10) \
|
if ((statep->__count & 0x18) >= 0x10) \
|
||||||
*outptr++ = base64 ((statep->__count >> 3) & ~3, var); \
|
*outptr++ = base64 ((statep->__count >> 3) & ~3, var); \
|
||||||
if (needs_explicit_shift (ch)) \
|
if (var == UTF_7_IMAP || needs_explicit_shift (ch)) \
|
||||||
*outptr++ = '-'; \
|
*outptr++ = '-'; \
|
||||||
*outptr++ = (unsigned char) ch; \
|
*outptr++ = (unsigned char) ch; \
|
||||||
|
if (var == UTF_7_IMAP && ch == '&') \
|
||||||
|
*outptr++ = '-'; \
|
||||||
statep->__count = 0; \
|
statep->__count = 0; \
|
||||||
} \
|
} \
|
||||||
else \
|
else \
|
||||||
|
Loading…
Reference in New Issue
Block a user