intl: Handle translation output codesets with suffixes [BZ #26383]

Commit 91927b7c76 (Rewrite iconv option parsing [BZ #19519]) did not
handle cases where the output codeset for translations (via the `gettext'
family of functions) might have a caller specified encoding suffix such as
TRANSLIT or IGNORE.  This led to a regression where translations did not
work when the codeset had a suffix.

This commit fixes the above issue by parsing any suffixes passed to
__dcigettext and adds two new test-cases to intl/tst-codeset.c to
verify correct behaviour.  The iconv-internal function __gconv_create_spec
and the static iconv-internal function gconv_destroy_spec are now visible
internally within glibc and used in intl/dcigettext.c.
This commit is contained in:
Arjun Shankar 2020-09-25 14:47:06 +02:00 committed by Fangrui Song
parent 804887a0c8
commit 453aafef16
8 changed files with 60 additions and 57 deletions

View File

@ -6,7 +6,9 @@ libc {
GLIBC_PRIVATE {
# functions shared with iconv program
__gconv_get_alias_db; __gconv_get_cache; __gconv_get_modules_db;
__gconv_open; __gconv_create_spec;
# functions used elsewhere in glibc
__gconv_open; __gconv_create_spec; __gconv_destroy_spec;
# function used by the gconv modules
__gconv_transliterate;

View File

@ -216,3 +216,13 @@ out:
return ret;
}
libc_hidden_def (__gconv_create_spec)
void
__gconv_destroy_spec (struct gconv_spec *conv_spec)
{
free (conv_spec->fromcode);
free (conv_spec->tocode);
return;
}
libc_hidden_def (__gconv_destroy_spec)

View File

@ -48,33 +48,6 @@
#define GCONV_IGNORE_ERRORS_SUFFIX "IGNORE"
/* This function accepts the charset names of the source and destination of the
conversion and populates *conv_spec with an equivalent conversion
specification that may later be used by __gconv_open. The charset names
might contain options in the form of suffixes that alter the conversion,
e.g. "ISO-10646/UTF-8/TRANSLIT". It processes the charset names, ignoring
and truncating any suffix options in fromcode, and processing and truncating
any suffix options in tocode. Supported suffix options ("TRANSLIT" or
"IGNORE") when found in tocode lead to the corresponding flag in *conv_spec
to be set to true. Unrecognized suffix options are silently discarded. If
the function succeeds, it returns conv_spec back to the caller. It returns
NULL upon failure. */
struct gconv_spec *
__gconv_create_spec (struct gconv_spec *conv_spec, const char *fromcode,
const char *tocode);
libc_hidden_proto (__gconv_create_spec)
/* This function frees all heap memory allocated by __gconv_create_spec. */
static void __attribute__ ((unused))
gconv_destroy_spec (struct gconv_spec *conv_spec)
{
free (conv_spec->fromcode);
free (conv_spec->tocode);
return;
}
/* This function copies in-order, characters from the source 's' that are
either alpha-numeric or one in one of these: "_-.,:/" - into the destination
'wp' while dropping all other characters. In the process, it converts all

View File

@ -170,6 +170,27 @@ extern int __gconv_open (struct gconv_spec *conv_spec,
__gconv_t *handle, int flags);
libc_hidden_proto (__gconv_open)
/* This function accepts the charset names of the source and destination of the
conversion and populates *conv_spec with an equivalent conversion
specification that may later be used by __gconv_open. The charset names
might contain options in the form of suffixes that alter the conversion,
e.g. "ISO-10646/UTF-8/TRANSLIT". It processes the charset names, ignoring
and truncating any suffix options in fromcode, and processing and truncating
any suffix options in tocode. Supported suffix options ("TRANSLIT" or
"IGNORE") when found in tocode lead to the corresponding flag in *conv_spec
to be set to true. Unrecognized suffix options are silently discarded. If
the function succeeds, it returns conv_spec back to the caller. It returns
NULL upon failure. */
extern struct gconv_spec *
__gconv_create_spec (struct gconv_spec *conv_spec, const char *fromcode,
const char *tocode);
libc_hidden_proto (__gconv_create_spec)
/* This function frees all heap memory allocated by __gconv_create_spec. */
extern void
__gconv_destroy_spec (struct gconv_spec *conv_spec);
libc_hidden_proto (__gconv_destroy_spec)
/* Free resources associated with transformation descriptor CD. */
extern int __gconv_close (__gconv_t cd)
attribute_hidden;

View File

@ -39,7 +39,7 @@ iconv_open (const char *tocode, const char *fromcode)
int res = __gconv_open (&conv_spec, &cd, 0);
gconv_destroy_spec (&conv_spec);
__gconv_destroy_spec (&conv_spec);
if (__builtin_expect (res, __GCONV_OK) != __GCONV_OK)
{

View File

@ -184,7 +184,7 @@ main (int argc, char *argv[])
/* Let's see whether we have these coded character sets. */
res = __gconv_open (&conv_spec, &cd, 0);
gconv_destroy_spec (&conv_spec);
__gconv_destroy_spec (&conv_spec);
if (res != __GCONV_OK)
{

View File

@ -1121,15 +1121,18 @@ _nl_find_msg (struct loaded_l10nfile *domain_file,
# ifdef _LIBC
struct gconv_spec conv_spec
= { .fromcode = norm_add_slashes (charset, ""),
.tocode = norm_add_slashes (outcharset, ""),
/* We always want to use transliteration. */
.translit = true,
.ignore = false
};
struct gconv_spec conv_spec;
__gconv_create_spec (&conv_spec, charset, outcharset);
/* We always want to use transliteration. */
conv_spec.translit = true;
int r = __gconv_open (&conv_spec, &convd->conv,
GCONV_AVOID_NOCONV);
__gconv_destroy_spec (&conv_spec);
if (__builtin_expect (r != __GCONV_OK, 0))
{
/* If the output encoding is the same there is

View File

@ -22,13 +22,11 @@
#include <stdio.h>
#include <stdlib.h>
#include <string.h>
#include <support/check.h>
static int
do_test (void)
{
char *s;
int result = 0;
unsetenv ("LANGUAGE");
unsetenv ("OUTPUT_CHARSET");
setlocale (LC_ALL, "de_DE.ISO-8859-1");
@ -36,25 +34,21 @@ do_test (void)
bindtextdomain ("codeset", OBJPFX "domaindir");
/* Here we expect output in ISO-8859-1. */
s = gettext ("cheese");
if (strcmp (s, "K\344se"))
{
printf ("call 1 returned: %s\n", s);
result = 1;
}
bind_textdomain_codeset ("codeset", "UTF-8");
TEST_COMPARE_STRING (gettext ("cheese"), "K\344se");
/* Here we expect output in UTF-8. */
s = gettext ("cheese");
if (strcmp (s, "K\303\244se"))
{
printf ("call 2 returned: %s\n", s);
result = 1;
}
bind_textdomain_codeset ("codeset", "UTF-8");
TEST_COMPARE_STRING (gettext ("cheese"), "K\303\244se");
return result;
/* `a with umlaut' is transliterated to `ae'. */
bind_textdomain_codeset ("codeset", "ASCII//TRANSLIT");
TEST_COMPARE_STRING (gettext ("cheese"), "Kaese");
/* Transliteration also works by default even if not set. */
bind_textdomain_codeset ("codeset", "ASCII");
TEST_COMPARE_STRING (gettext ("cheese"), "Kaese");
return 0;
}
#define TEST_FUNCTION do_test ()
#include "../test-skeleton.c"
#include <support/test-driver.c>