mirror of
https://sourceware.org/git/glibc.git
synced 2024-11-21 12:30:06 +00:00
iconv: Add UTF-7-IMAP variant in utf-7.c
UTF-7-IMAP differs from UTF-7 in the followings ways (see RFC 3501[1] for reference) : - The shift character is '&' instead of '+' - There is no "optional direct characters" and the "direct characters" set is different - There is no implicit shift back to US-ASCII from BASE64, all BASE64 sequences MUST be terminated with '-' [1]: https://datatracker.ietf.org/doc/html/rfc3501#section-5.1.3 Signed-off-by: Max Gautier <mg@max.gautier.name> Reviewed-by: Adhemerval Zanella <adhemerval.zanella@linaro.org>
This commit is contained in:
parent
ef7b963280
commit
9df157b4ed
@ -94,6 +94,7 @@ EUC-TW EUC-TW Y UTF8
|
||||
GBK GBK Y UTF8
|
||||
BIG5HKSCS BIG5HKSCS Y UTF8
|
||||
UTF-7 UTF-7 N UTF8
|
||||
UTF-7-IMAP UTF-7-IMAP N UTF8
|
||||
IBM856 IBM856 N UTF8
|
||||
IBM922 IBM922 Y UTF8
|
||||
IBM930 IBM930 N UTF8
|
||||
|
@ -113,3 +113,7 @@ module INTERNAL UTF-32BE// UTF-32 1
|
||||
alias UTF7// UTF-7//
|
||||
module UTF-7// INTERNAL UTF-7 1
|
||||
module INTERNAL UTF-7// UTF-7 1
|
||||
|
||||
# from to module cost
|
||||
module UTF-7-IMAP// INTERNAL UTF-7 1
|
||||
module INTERNAL UTF-7-IMAP// UTF-7 1
|
||||
|
1
iconvdata/testdata/UTF-7-IMAP
vendored
Normal file
1
iconvdata/testdata/UTF-7-IMAP
vendored
Normal file
@ -0,0 +1 @@
|
||||
&EqASGxItEps- Amharic&AAoBDQ-esky Czech&AAo-Dansk Danish&AAo-English English&AAo-Suomi Finnish&AAo-Fran&AOc-ais French&AAo-Deutsch German&AAoDlQO7A7sDtwO9A7kDugOs- Greek&AAoF4gXRBegF2QXq- Hebrew&AAo-Italiano Italian&AAo-Norsk Norwegian&AAoEIARDBEEEQQQ6BDgEOQ- Russian&AAo-Espa&APE-ol Spanish&AAo-Svenska Swedish&AAoOIA4yDikOMg5EDhcOIg- Thai&AAo-T&APw-rk&AOc-e Turkish&AAo-Ti&Hr8-ng Vi&Hsc-t Vietnamese&AApl5Wcsip4- Japanese&AApOLWWH- Chinese&AArVXK4A- Korean&AAoACg-// Checking for correct handling of shift characters ('&-', '-') after base64 sequences&AArVXK4A-&-&AArVXK4A--&AAoACg-// Checking for correct handling of litteral '&-' and '-'&AAo----&-&--&AAoACg-// The last line of this file is missing the end-of-line terminator&AAo-// on purpose, in order to test that the conversion empties the bit buffer&AAo-// and shifts back to the initial state at the end of the conversion.&AAo-A&ImIDkQ-
|
32
iconvdata/testdata/UTF-7-IMAP..UTF8
vendored
Normal file
32
iconvdata/testdata/UTF-7-IMAP..UTF8
vendored
Normal file
@ -0,0 +1,32 @@
|
||||
አማርኛ Amharic
|
||||
česky Czech
|
||||
Dansk Danish
|
||||
English English
|
||||
Suomi Finnish
|
||||
Français French
|
||||
Deutsch German
|
||||
Ελληνικά Greek
|
||||
עברית Hebrew
|
||||
Italiano Italian
|
||||
Norsk Norwegian
|
||||
Русский Russian
|
||||
Español Spanish
|
||||
Svenska Swedish
|
||||
ภาษาไทย Thai
|
||||
Türkçe Turkish
|
||||
Tiếng Việt Vietnamese
|
||||
日本語 Japanese
|
||||
中文 Chinese
|
||||
한글 Korean
|
||||
|
||||
// Checking for correct handling of shift characters ('&', '-') after base64 sequences
|
||||
한글&
|
||||
한글-
|
||||
|
||||
// Checking for correct handling of litteral '&' and '-'
|
||||
---&&-
|
||||
|
||||
// The last line of this file is missing the end-of-line terminator
|
||||
// on purpose, in order to test that the conversion empties the bit buffer
|
||||
// and shifts back to the initial state at the end of the conversion.
|
||||
A≢Α
|
@ -33,11 +33,13 @@
|
||||
enum variant
|
||||
{
|
||||
UTF7,
|
||||
UTF_7_IMAP
|
||||
};
|
||||
|
||||
/* Must be in the same order as enum variant above. */
|
||||
static const char names[] =
|
||||
"UTF-7//\0"
|
||||
"UTF-7-IMAP//\0"
|
||||
"\0";
|
||||
|
||||
static uint32_t
|
||||
@ -45,6 +47,8 @@ shift_character (enum variant const var)
|
||||
{
|
||||
if (var == UTF7)
|
||||
return '+';
|
||||
else if (var == UTF_7_IMAP)
|
||||
return '&';
|
||||
else
|
||||
abort ();
|
||||
}
|
||||
@ -58,6 +62,9 @@ between (uint32_t const ch,
|
||||
|
||||
/* The set of "direct characters":
|
||||
A-Z a-z 0-9 ' ( ) , - . / : ? space tab lf cr
|
||||
FOR UTF-7-IMAP
|
||||
A-Z a-z 0-9 ' ( ) , - . / : ? space
|
||||
! " # $ % + * ; < = > @ [ \ ] ^ _ ` { | } ~
|
||||
*/
|
||||
|
||||
static bool
|
||||
@ -71,6 +78,8 @@ isdirect (uint32_t ch, enum variant var)
|
||||
|| between (ch, ',', '/')
|
||||
|| ch == ':' || ch == '?'
|
||||
|| ch == ' ' || ch == '\t' || ch == '\n' || ch == '\r');
|
||||
else if (var == UTF_7_IMAP)
|
||||
return (ch != '&' && between (ch, ' ', '~'));
|
||||
abort ();
|
||||
}
|
||||
|
||||
@ -124,6 +133,8 @@ base64 (unsigned int i, enum variant var)
|
||||
return '+';
|
||||
else if (i == 63 && var == UTF7)
|
||||
return '/';
|
||||
else if (i == 63 && var == UTF_7_IMAP)
|
||||
return ',';
|
||||
else
|
||||
abort ();
|
||||
}
|
||||
@ -308,7 +319,8 @@ gconv_end (struct __gconv_step *data)
|
||||
i = ch - '0' + 52; \
|
||||
else if (ch == '+') \
|
||||
i = 62; \
|
||||
else if (ch == '/') \
|
||||
else if ((var == UTF7 && ch == '/') \
|
||||
|| (var == UTF_7_IMAP && ch == ',')) \
|
||||
i = 63; \
|
||||
else \
|
||||
{ \
|
||||
@ -316,8 +328,10 @@ gconv_end (struct __gconv_step *data)
|
||||
\
|
||||
/* If accumulated data is nonzero, the input is invalid. */ \
|
||||
/* Also, partial UTF-16 characters are invalid. */ \
|
||||
if (__builtin_expect (statep->__value.__wch != 0, 0) \
|
||||
|| __builtin_expect ((statep->__count >> 3) <= 26, 0)) \
|
||||
/* In IMAP variant, must be terminated by '-'. */ \
|
||||
if (__glibc_unlikely (statep->__value.__wch != 0) \
|
||||
|| __glibc_unlikely ((statep->__count >> 3) <= 26) \
|
||||
|| __glibc_unlikely (var == UTF_7_IMAP && ch != '-')) \
|
||||
{ \
|
||||
STANDARD_FROM_LOOP_ERR_HANDLER ((statep->__count = 0, 1)); \
|
||||
} \
|
||||
@ -474,13 +488,15 @@ gconv_end (struct __gconv_step *data)
|
||||
else \
|
||||
{ \
|
||||
/* base64 encoding active */ \
|
||||
if (isdirect (ch, var)) \
|
||||
if ((var == UTF_7_IMAP && ch == '&') || isdirect (ch, var)) \
|
||||
{ \
|
||||
/* deactivate base64 encoding */ \
|
||||
size_t count; \
|
||||
\
|
||||
count = ((statep->__count & 0x18) >= 0x10) \
|
||||
+ needs_explicit_shift (ch) + 1; \
|
||||
+ (var == UTF_7_IMAP || needs_explicit_shift (ch)) \
|
||||
+ (var == UTF_7_IMAP && ch == '&') \
|
||||
+ 1; \
|
||||
if (__glibc_unlikely (outptr + count > outend)) \
|
||||
{ \
|
||||
result = __GCONV_FULL_OUTPUT; \
|
||||
@ -489,9 +505,11 @@ gconv_end (struct __gconv_step *data)
|
||||
\
|
||||
if ((statep->__count & 0x18) >= 0x10) \
|
||||
*outptr++ = base64 ((statep->__count >> 3) & ~3, var); \
|
||||
if (needs_explicit_shift (ch)) \
|
||||
if (var == UTF_7_IMAP || needs_explicit_shift (ch)) \
|
||||
*outptr++ = '-'; \
|
||||
*outptr++ = (unsigned char) ch; \
|
||||
if (var == UTF_7_IMAP && ch == '&') \
|
||||
*outptr++ = '-'; \
|
||||
statep->__count = 0; \
|
||||
} \
|
||||
else \
|
||||
|
Loading…
Reference in New Issue
Block a user