2000-11-27  Ulrich Drepper  <drepper@redhat.com>

	* catgets/Makefile (test1.cat): Set LC_ALL, LOCPATH, and GCONV_PATH
	for gencat run.
	(libc.cat): Likewise.
	* catgets/gencat.c: Implement handling of message catalogs encoded
	with stateful character sets.
	Based on a patch by Shinya Hanataka <hanataka@abyss.rim.or.jp>.
This commit is contained in:
Ulrich Drepper 2000-11-27 08:58:12 +00:00
parent ff8beceed1
commit d2defdc4a2
4 changed files with 274 additions and 38 deletions

View File

@ -1,3 +1,12 @@
2000-11-27 Ulrich Drepper <drepper@redhat.com>
* catgets/Makefile (test1.cat): Set LC_ALL, LOCPATH, and GCONV_PATH
for gencat run.
(libc.cat): Likewise.
* catgets/gencat.c: Implement handling of message catalogs encoded
with stateful character sets.
Based on a patch by Shinya Hanataka <hanataka@abyss.rim.or.jp>.
2000-11-26 Ulrich Drepper <drepper@redhat.com>
* sysdeps/unix/opendir.c (__opendir): Add cast to avoid warning.

16
NEWS
View File

@ -1,4 +1,4 @@
GNU C Library NEWS -- history of user-visible changes. 2000-08-13
GNU C Library NEWS -- history of user-visible changes. 2000-11-27
Copyright (C) 1992-1999, 2000 Free Software Foundation, Inc.
See the end for copying conditions.
@ -7,6 +7,20 @@ Please send GNU C library bug reports using the `glibcbug' script to
<bugs@gnu.org>. Questions and suggestions should be send to
<bug-glibc@gnu.org>.
Version 2.2.1
* The gencat program now parses the input file according to the charset
selected by the LC_CTYPE category. This is important for stateful
character sets. To make generating catalogs easier there is a way
to overwrite the charset selected by the locale: before the first
message or $ quote line the catalog can contain a line like
$ codeset=ISO-8859-2
to select the charset (ISO-8859-2 in this case).
Implemented by Shinya Hanataka and Ulrich Drepper.
Version 2.2
* Greg McGary added runtime support for bounds checking using gcc's

View File

@ -53,10 +53,13 @@ tests: $(objpfx)de/libc.cat $(objpfx)test1.cat
# This test just checks whether the program produces any error or not.
# The result is not tested.
$(objpfx)test1.cat: test1.msg $(objpfx)gencat
LC_ALL=hr_HR.ISO-8859-2 LOCPATH=$(common-objpfx)localedata \
GCONV_PATH=$(common-objpfx)iconvdata \
$(built-program-cmd) -H $(objpfx)test1.h $@ $<
$(objpfx)de/libc.cat: $(objpfx)de.msg $(objpfx)gencat
$(make-target-directory)
$(built-program-cmd) $@ $<
LC_ALL=de_DE.ISO-8859-1 LOCPATH=$(common-objpfx)localedata \
GCONV_PATH=$(common-objpfx)iconvdata $(built-program-cmd) $@ $<
$(objpfx)tst-catgets.out: $(objpfx)de/libc.cat
# Generate a non-simple input file.

View File

@ -22,11 +22,14 @@
#endif
#include <argp.h>
#include <assert.h>
#include <ctype.h>
#include <endian.h>
#include <errno.h>
#include <error.h>
#include <fcntl.h>
#include <iconv.h>
#include <langinfo.h>
#include <locale.h>
#include <libintl.h>
#include <limits.h>
@ -37,6 +40,7 @@
#include <stdlib.h>
#include <string.h>
#include <unistd.h>
#include <wchar.h>
#include "version.h"
@ -79,7 +83,7 @@ struct catalog
struct set_list *all_sets;
struct set_list *current_set;
size_t total_messages;
char quote_char;
wint_t quote_char;
int last_set;
struct obstack mem_pool;
@ -137,6 +141,8 @@ static struct argp argp =
/* Wrapper functions with error checking for standard functions. */
extern void *xmalloc (size_t n);
extern void *xcalloc (size_t n, size_t s);
extern void *xrealloc (void *o, size_t n);
extern char *xstrdup (const char *);
/* Prototypes for local functions. */
static void error_print (void);
@ -145,9 +151,11 @@ static struct catalog *read_input_file (struct catalog *current,
static void write_out (struct catalog *result, const char *output_name,
const char *header_name);
static struct set_list *find_set (struct catalog *current, int number);
static void normalize_line (const char *fname, size_t line, char *string,
char quote_char);
static void normalize_line (const char *fname, size_t line, iconv_t cd,
wchar_t *string, wchar_t quote_char);
static void read_old (struct catalog *catalog, const char *file_name);
static int open_conversion (const char *codesetp, iconv_t *cd_towcp,
iconv_t *cd_tombp);
int
@ -260,6 +268,11 @@ read_input_file (struct catalog *current, const char *fname)
char *buf;
size_t len;
size_t line_number;
wchar_t *wbuf;
size_t wbufsize;
iconv_t cd_towc = (iconv_t) -1;
iconv_t cd_tomb = (iconv_t) -1;
char *codeset = NULL;
if (strcmp (fname, "-") == 0 || strcmp (fname, "/dev/stdin") == 0)
{
@ -289,6 +302,10 @@ read_input_file (struct catalog *current, const char *fname)
buf = NULL;
len = 0;
line_number = 0;
wbufsize = 1024;
wbuf = (wchar_t *) xmalloc (wbufsize);
while (!feof (fp))
{
int continued;
@ -328,7 +345,29 @@ read_input_file (struct catalog *current, const char *fname)
if (this_line[0] == '$')
{
if (isblank (this_line[1]))
{
int cnt = 1;
while (isblank (this_line[cnt]))
++cnt;
if (strncmp (&this_line[cnt], "codeset=", 8) != 0)
/* This is a comment line. Do nothing. */;
else if (codeset != NULL)
/* Ignore multiple codeset. */;
else
{
int start = cnt + 8;
cnt = start;
while (this_line[cnt] != '\0' && !isspace (this_line[cnt]))
++cnt;
if (cnt != start)
{
int len = cnt - start;
codeset = xmalloc (len + 1);
*((char *) mempcpy (codeset, &this_line[start], len))
= '\0';
}
}
}
else if (strncmp (&this_line[1], "set", 3) == 0)
{
int cnt = sizeof ("set");
@ -470,12 +509,44 @@ this is the first definition"));
}
else if (strncmp (&this_line[1], "quote", 5) == 0)
{
int cnt = sizeof ("quote");
char buf[2];
char *bufptr;
size_t buflen;
char *wbufptr;
size_t wbuflen;
int cnt;
cnt = sizeof ("quote");
while (isspace (this_line[cnt]))
++cnt;
/* We need the conversion. */
if (cd_towc == (iconv_t) -1
&& open_conversion (codeset, &cd_towc, &cd_tomb) != 0)
/* Something is wrong. */
goto out;
/* Yes, the quote char can be '\0'; this means no quote
char. */
current->quote_char = this_line[cnt];
char. The function using the information works on
wide characters so we have to convert it here. */
buf[0] = this_line[cnt];
buf[1] = '\0';
bufptr = buf;
buflen = 2;
wbufptr = (char *) wbuf;
wbuflen = wbufsize;
/* Flush the state. */
iconv (cd_towc, NULL, NULL, NULL, NULL);
iconv (cd_towc, &bufptr, &buflen, &wbufptr, &wbuflen);
if (buflen != 0 || (wchar_t *) wbufptr != &wbuf[2])
error_at_line (0, 0, fname, start_line,
gettext ("invalid quote character"));
else
/* Use the converted wide character. */
current->quote_char = wbuf[0];
}
else
{
@ -568,15 +639,92 @@ duplicated message identifier"));
if (message_number != 0)
{
char *inbuf;
size_t inlen;
char *outbuf;
size_t outlen;
struct message_list *newp;
size_t this_line_len = strlen (this_line) + 1;
/* We need the conversion. */
if (cd_towc == (iconv_t) -1
&& open_conversion (codeset, &cd_towc, &cd_tomb) != 0)
/* Something is wrong. */
goto out;
/* Convert to a wide character string. We have to
interpret escape sequences which will be impossible
without doing the conversion if the codeset of the
message is stateful. */
while (1)
{
inbuf = this_line;
inlen = this_line_len;
outbuf = (char *) wbuf;
outlen = wbufsize;
/* Flush the state. */
iconv (cd_towc, NULL, NULL, NULL, NULL);
iconv (cd_towc, &inbuf, &inlen, &outbuf, &outlen);
if (inlen == 0)
{
/* The string is converted. */
assert (outlen < wbufsize);
assert (wbuf[(wbufsize - outlen) / sizeof (wchar_t) - 1]
== L'\0');
break;
}
if (outlen != 0)
{
/* Something is wrong with this string, we ignore it. */
error_at_line (0, 0, fname, start_line, gettext ("\
invalid character: message ignored"));
goto ignore;
}
/* The output buffer is too small. */
wbufsize *= 2;
wbuf = (wchar_t *) xrealloc (wbuf, wbufsize);
}
used = 1; /* Yes, we use the line. */
/* Strip quote characters, change escape sequences into
correct characters etc. */
normalize_line (fname, start_line, this_line,
normalize_line (fname, start_line, cd_towc, wbuf,
current->quote_char);
/* Now the string is free of escape sequences. Convert it
back into a multibyte character string. First free the
memory allocated for the original string. */
obstack_free (&current->mem_pool, this_line);
/* Now fill in the new string. It should never happen that
the replaced string is longer than the original. */
inbuf = (char *) wbuf;
inlen = (wcslen (wbuf) + 1) * sizeof (wchar_t);
outlen = obstack_room (&current->mem_pool);
start_line = (char *) obstack_alloc (&current->mem_pool, outlen);
outbuf = start_line;
/* Flush the state. */
iconv (cd_tomb, NULL, NULL, NULL, NULL);
iconv (cd_tomb, &inbuf, &inlen, &outbuf, &outlen);
if (inlen != 0)
{
error_at_line (0, 0, fname, start_line,
gettext ("invalid line"));
goto ignore;
}
assert (outbuf[-1] == '\0');
/* Free the memory in the obstack we don't use. */
obstack_free (&current->mem_pool, outbuf);
newp = (struct message_list *) xmalloc (sizeof (*newp));
newp->number = message_number;
newp->message = this_line;
@ -625,11 +773,20 @@ duplicated message identifier"));
gettext ("malformed line ignored"));
}
ignore:
/* We can save the memory for the line if it was not used. */
if (!used)
obstack_free (&current->mem_pool, this_line);
}
/* Close the conversion modules. */
iconv_close (cd_towc);
iconv_close (cd_tomb);
free (codeset);
out:
free (wbuf);
if (fp != stdin)
fclose (fp);
return current;
@ -895,13 +1052,14 @@ find_set (struct catalog *current, int number)
/* Normalize given string *in*place* by processing escape sequences
and quote characters. */
static void
normalize_line (const char *fname, size_t line, char *string, char quote_char)
normalize_line (const char *fname, size_t line, iconv_t cd, wchar_t *string,
wchar_t quote_char)
{
int is_quoted;
char *rp = string;
char *wp = string;
wchar_t *rp = string;
wchar_t *wp = string;
if (quote_char != '\0' && *rp == quote_char)
if (quote_char != L'\0' && *rp == quote_char)
{
is_quoted = 1;
++rp;
@ -909,58 +1067,83 @@ normalize_line (const char *fname, size_t line, char *string, char quote_char)
else
is_quoted = 0;
while (*rp != '\0')
while (*rp != L'\0')
if (*rp == quote_char)
/* We simply end the string when we find the first time an
not-escaped quote character. */
break;
else if (*rp == '\\')
else if (*rp == L'\\')
{
++rp;
if (quote_char != '\0' && *rp == quote_char)
if (quote_char != L'\0' && *rp == quote_char)
/* This is an extension to XPG. */
*wp++ = *rp++;
else
/* Recognize escape sequences. */
switch (*rp)
{
case 'n':
*wp++ = '\n';
case L'n':
*wp++ = L'\n';
++rp;
break;
case 't':
*wp++ = '\t';
case L't':
*wp++ = L'\t';
++rp;
break;
case 'v':
*wp++ = '\v';
case L'v':
*wp++ = L'\v';
++rp;
break;
case 'b':
*wp++ = '\b';
case L'b':
*wp++ = L'\b';
++rp;
break;
case 'r':
*wp++ = '\r';
case L'r':
*wp++ = L'\r';
++rp;
break;
case 'f':
*wp++ = '\f';
case L'f':
*wp++ = L'\f';
++rp;
break;
case '\\':
*wp++ = '\\';
case L'\\':
*wp++ = L'\\';
++rp;
break;
case '0' ... '7':
case L'0' ... L'7':
{
int number = *rp++ - '0';
while (number <= (255 / 8) && *rp >= '0' && *rp <= '7')
int number;
char cbuf[2];
char *cbufptr;
size_t cbufin;
wchar_t wcbuf[2];
char *wcbufptr;
size_t wcbufin;
number = *rp++ - L'0';
while (number <= (255 / 8) && *rp >= L'0' && *rp <= L'7')
{
number *= 8;
number += *rp++ - '0';
number += *rp++ - L'0';
}
*wp++ = (char) number;
cbuf[0] = (char) number;
cbuf[1] = '\0';
cbufptr = cbuf;
cbufin = 2;
wcbufptr = (char *) wcbuf;
wcbufin = sizeof (wcbuf);
/* Flush the state. */
iconv (cd, NULL, NULL, NULL, NULL);
iconv (cd, &cbufptr, &cbufin, &wcbufptr, &wcbufin);
if (cbufptr != &cbuf[2] || (wchar_t *) wcbufptr != &wcbuf[2])
error_at_line (0, 0, fname, line,
gettext ("invalid escape sequence"));
else
*wp++ = wcbuf[0];
}
break;
default:
@ -974,10 +1157,10 @@ normalize_line (const char *fname, size_t line, char *string, char quote_char)
/* If we saw a quote character at the beginning we expect another
one at the end. */
if (is_quoted && *rp != quote_char)
error (0, 0, fname, line, gettext ("unterminated message"));
error_at_line (0, 0, fname, line, gettext ("unterminated message"));
/* Terminate string. */
*wp = '\0';
*wp = L'\0';
return;
}
@ -1069,3 +1252,30 @@ read_old (struct catalog *catalog, const char *file_name)
}
}
}
static int
open_conversion (const char *codeset, iconv_t *cd_towcp, iconv_t *cd_tombp)
{
/* If the input file does not specify the codeset use the locale's. */
if (codeset == NULL)
{
setlocale (LC_ALL, "");
codeset = nl_langinfo (CODESET);
setlocale (LC_ALL, "C");
}
/* Get the conversion modules. */
*cd_towcp = iconv_open ("WCHAR_T", codeset);
*cd_tombp = iconv_open (codeset, "WCHAR_T");
if (*cd_towcp == (iconv_t) -1 || *cd_tombp == (iconv_t) -1)
{
error (0, 0, gettext ("conversion modules not available"));
if (*cd_towcp != (iconv_t) -1)
iconv_close (*cd_towcp);
return 1;
}
return 0;
}