mirror of
https://sourceware.org/git/glibc.git
synced 2025-01-04 16:50:13 +00:00
534 lines
13 KiB
C
534 lines
13 KiB
C
|
/* Copyright (C) 1995 Free Software Foundation, Inc.
|
||
|
|
||
|
The GNU C Library is free software; you can redistribute it and/or
|
||
|
modify it under the terms of the GNU Library General Public License as
|
||
|
published by the Free Software Foundation; either version 2 of the
|
||
|
License, or (at your option) any later version.
|
||
|
|
||
|
The GNU C Library is distributed in the hope that it will be useful,
|
||
|
but WITHOUT ANY WARRANTY; without even the implied warranty of
|
||
|
MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the GNU
|
||
|
Library General Public License for more details.
|
||
|
|
||
|
You should have received a copy of the GNU Library General Public
|
||
|
License along with the GNU C Library; see the file COPYING.LIB. If
|
||
|
not, write to the Free Software Foundation, Inc., 675 Mass Ave,
|
||
|
Cambridge, MA 02139, USA. */
|
||
|
|
||
|
#include <ctype.h>
|
||
|
#include <langinfo.h>
|
||
|
#include <libintl.h>
|
||
|
#include <stdio.h>
|
||
|
#include <stdlib.h>
|
||
|
#include <unistd.h>
|
||
|
|
||
|
#include "localedef.h"
|
||
|
#include "token.h"
|
||
|
|
||
|
|
||
|
/* Include the hashing table for the keywords. */
|
||
|
const struct locale_keyword* in_word_set (register const char *str,
|
||
|
register int len);
|
||
|
#include "keyword.h"
|
||
|
|
||
|
|
||
|
/* Contains the status of reading the locale definition file. */
|
||
|
struct locfile_data locfile_data;
|
||
|
|
||
|
/* This is a flag used while collation input. This is the only place
|
||
|
where element names beside the ones defined in the character map are
|
||
|
allowed. There we must not give error messages. */
|
||
|
int reject_new_char = 1;
|
||
|
|
||
|
/* Prototypes for local functions. */
|
||
|
static int get_char (void);
|
||
|
|
||
|
|
||
|
#define LD locfile_data
|
||
|
|
||
|
/* Opens the locale definition file and initializes the status data structure
|
||
|
for following calls of `locfile_lex'. */
|
||
|
void
|
||
|
locfile_open (const char *fname)
|
||
|
{
|
||
|
if (fname == NULL)
|
||
|
/* We read from stdin. */
|
||
|
LD.filename = "<stdin>";
|
||
|
else
|
||
|
{
|
||
|
if (freopen (fname, "r", stdin) == NULL)
|
||
|
error (4, 0, gettext ("input file `%s' not found"), fname);
|
||
|
LD.filename = fname;
|
||
|
}
|
||
|
|
||
|
/* Set default values. */
|
||
|
LD.escape_char = '\\';
|
||
|
LD.comment_char = '#';
|
||
|
|
||
|
LD.bufsize = sysconf (_SC_LINE_MAX);
|
||
|
LD.buf = (char *) xmalloc (LD.bufsize);
|
||
|
LD.strbuf = (char *) xmalloc (LD.bufsize);
|
||
|
|
||
|
LD.buf_ptr = LD.returned_tokens = LD.line_no = 0;
|
||
|
|
||
|
/* Now sign that we want immediately read a line. */
|
||
|
LD.continue_line = 1;
|
||
|
LD.buf[LD.buf_ptr] = '\0';
|
||
|
}
|
||
|
|
||
|
|
||
|
int
|
||
|
xlocfile_lex (char **token, int *token_len)
|
||
|
{
|
||
|
int retval = locfile_lex (token, token_len);
|
||
|
|
||
|
if (retval == 0)
|
||
|
/* I.e. end of file. */
|
||
|
error (4, 0, gettext ("%s: unexpected end of file in locale defintion "
|
||
|
"file"), locfile_data.filename);
|
||
|
|
||
|
return retval;
|
||
|
}
|
||
|
|
||
|
int
|
||
|
locfile_lex (char **token, int *token_len)
|
||
|
{
|
||
|
int start_again;
|
||
|
int retval = 0;
|
||
|
|
||
|
do
|
||
|
{
|
||
|
int start_ptr;
|
||
|
|
||
|
start_again = 0;
|
||
|
|
||
|
/* Read the next line. Skip over empty lines and comments. */
|
||
|
if ((LD.buf[LD.buf_ptr] == '\0' && LD.continue_line != 0)
|
||
|
|| LD.buf_ptr >= LD.bufsize
|
||
|
|| (posix_conformance == 0 && LD.buf[LD.buf_ptr] == LD.comment_char))
|
||
|
do
|
||
|
{
|
||
|
size_t linelen;
|
||
|
|
||
|
LD.buf_ptr = 0;
|
||
|
|
||
|
if (fgets (LD.buf, LD.bufsize, stdin) == NULL)
|
||
|
{
|
||
|
/* This makes subsequent calls also return EOF. */
|
||
|
LD.buf[0] = '\0';
|
||
|
return 0;
|
||
|
}
|
||
|
|
||
|
/* Increment line number counter. */
|
||
|
++LD.line_no;
|
||
|
|
||
|
/* We now have to look whether this line is continued and
|
||
|
whether it at all fits into our buffer. */
|
||
|
linelen = strlen (LD.buf);
|
||
|
|
||
|
if (linelen == LD.bufsize - 1)
|
||
|
/* The did not fit into the buffer. */
|
||
|
error (2, 0, gettext ("%s:%Zd: line too long; use "
|
||
|
"`getconf LINE_MAX' to get the maximum "
|
||
|
"line length"), LD.filename, LD.line_no);
|
||
|
|
||
|
/* Remove '\n' at end of line. */
|
||
|
if (LD.buf[linelen - 1] == '\n')
|
||
|
LD.buf[--linelen] = '\0';
|
||
|
|
||
|
if (linelen > 0 && LD.buf[linelen - 1] == LD.escape_char)
|
||
|
{
|
||
|
LD.buf[--linelen] = '\0';
|
||
|
LD.continue_line = 1;
|
||
|
}
|
||
|
else
|
||
|
LD.continue_line = 0;
|
||
|
|
||
|
while (isspace (LD.buf[LD.buf_ptr]))
|
||
|
++LD.buf_ptr;
|
||
|
|
||
|
/* We are not so restrictive and allow white spaces before
|
||
|
a comment. */
|
||
|
if (posix_conformance == 0
|
||
|
&& LD.buf[LD.buf_ptr] == LD.comment_char
|
||
|
&& LD.buf_ptr != 0)
|
||
|
error (0, 0, gettext ("%s:%Zd: comment does not start in "
|
||
|
"column 1"), LD.filename, LD.line_no);
|
||
|
}
|
||
|
while (LD.buf[LD.buf_ptr] == '\0'
|
||
|
|| LD.buf[LD.buf_ptr] == LD.comment_char);
|
||
|
|
||
|
|
||
|
/* Get information for return values. */
|
||
|
*token = LD.buf + LD.buf_ptr;
|
||
|
start_ptr = LD.buf_ptr;
|
||
|
|
||
|
/* If no further character is in the line this is the end of a logical
|
||
|
line. This information is needed in the parser. */
|
||
|
if (LD.buf[LD.buf_ptr] == '\0')
|
||
|
{
|
||
|
LD.buf_ptr = LD.bufsize;
|
||
|
retval = TOK_ENDOFLINE;
|
||
|
}
|
||
|
else if (isalpha (LD.buf[LD.buf_ptr]))
|
||
|
/* The token is an identifier. The POSIX standard does not say
|
||
|
what characters might be contained but offical POSIX locale
|
||
|
definition files contain beside alnum characters '_', '-' and
|
||
|
'+'. */
|
||
|
{
|
||
|
const struct locale_keyword *kw;
|
||
|
|
||
|
do
|
||
|
++LD.buf_ptr;
|
||
|
while (isalnum (LD.buf[LD.buf_ptr]) || LD.buf[LD.buf_ptr] == '_'
|
||
|
|| LD.buf[LD.buf_ptr] == '-' || LD.buf[LD.buf_ptr] == '+');
|
||
|
|
||
|
/* Look in table of keywords. */
|
||
|
kw = in_word_set (*token, LD.buf_ptr - start_ptr);
|
||
|
if (kw == NULL)
|
||
|
retval = TOK_IDENT;
|
||
|
else
|
||
|
{
|
||
|
if (kw->token_id == TOK_ESCAPE_CHAR
|
||
|
|| kw->token_id == TOK_COMMENT_CHAR)
|
||
|
/* `escape_char' and `comment_char' are keywords for the
|
||
|
lexer. Do not give them to the parser. */
|
||
|
{
|
||
|
start_again = 1;
|
||
|
|
||
|
if (!isspace (LD.buf[LD.buf_ptr])
|
||
|
|| (posix_conformance && LD.returned_tokens > 0))
|
||
|
error (0, 0, gettext ("%s:%Zd: syntax error in locale "
|
||
|
"definition file"),
|
||
|
LD.filename, LD.line_no);
|
||
|
|
||
|
do
|
||
|
++LD.buf_ptr;
|
||
|
while (isspace (LD.buf[LD.buf_ptr]));
|
||
|
|
||
|
kw->token_id == TOK_ESCAPE_CHAR
|
||
|
? LD.escape_char
|
||
|
: LD.comment_char = LD.buf[LD.buf_ptr++];
|
||
|
|
||
|
ignore_to_eol (0, posix_conformance);
|
||
|
}
|
||
|
else
|
||
|
/* It is one of the normal keywords. */
|
||
|
retval = kw->token_id;
|
||
|
}
|
||
|
|
||
|
*token_len = LD.buf_ptr - start_ptr;
|
||
|
}
|
||
|
else if (LD.buf[LD.buf_ptr] == '"')
|
||
|
/* Read a string. All symbolic character descriptions are expanded.
|
||
|
This has to be done in a local buffer because a simple symbolic
|
||
|
character like <A> may expand to upto 6 bytes. */
|
||
|
{
|
||
|
char *last = LD.strbuf;
|
||
|
|
||
|
++LD.buf_ptr;
|
||
|
while (LD.buf[LD.buf_ptr] != '"')
|
||
|
{
|
||
|
int pre = LD.buf_ptr;
|
||
|
int char_val = get_char (); /* token, token_len); */
|
||
|
|
||
|
if (char_val == 0)
|
||
|
{
|
||
|
error (4, 0, gettext ("%s:%Zd: unterminated string at end "
|
||
|
"of line"), LD.filename, LD.line_no);
|
||
|
/* NOTREACHED */
|
||
|
}
|
||
|
|
||
|
if (char_val > 0)
|
||
|
/* Unknown characters are simply not stored. */
|
||
|
last += char_to_utf (last, char_val);
|
||
|
else
|
||
|
{
|
||
|
char tmp[LD.buf_ptr - pre + 1];
|
||
|
memcpy (tmp, &LD.buf[pre], LD.buf_ptr - pre);
|
||
|
tmp[LD.buf_ptr - pre] = '\0';
|
||
|
error (0, 0, gettext ("%s:%Zd: character `%s' not defined"),
|
||
|
LD.filename, LD.line_no, tmp);
|
||
|
}
|
||
|
}
|
||
|
if (LD.buf[LD.buf_ptr] != '\0')
|
||
|
++LD.buf_ptr;
|
||
|
|
||
|
*last = '\0';
|
||
|
*token = LD.strbuf;
|
||
|
*token_len = last - LD.strbuf;
|
||
|
retval = TOK_STRING;
|
||
|
}
|
||
|
else if (LD.buf[LD.buf_ptr] == '.' && LD.buf[LD.buf_ptr + 1] == '.'
|
||
|
&& LD.buf[LD.buf_ptr + 2] == '.')
|
||
|
{
|
||
|
LD.buf_ptr += 3;
|
||
|
retval = TOK_ELLIPSIS;
|
||
|
}
|
||
|
else if (LD.buf[LD.buf_ptr] == LD.escape_char)
|
||
|
{
|
||
|
char *endp;
|
||
|
|
||
|
++LD.buf_ptr;
|
||
|
switch (LD.buf[LD.buf_ptr])
|
||
|
{
|
||
|
case 'x':
|
||
|
if (isdigit (LD.buf[++LD.buf_ptr]))
|
||
|
{
|
||
|
retval = strtol (&LD.buf[LD.buf_ptr], &endp, 16);
|
||
|
if (endp - (LD.buf + LD.buf_ptr) < 2 || retval > 255)
|
||
|
retval = 'x';
|
||
|
else
|
||
|
LD.buf_ptr = endp - LD.buf;
|
||
|
}
|
||
|
else
|
||
|
retval = 'x';
|
||
|
break;
|
||
|
case 'd':
|
||
|
if (isdigit (LD.buf[++LD.buf_ptr]))
|
||
|
{
|
||
|
retval = strtol (&LD.buf[LD.buf_ptr], &endp, 10);
|
||
|
if (endp - (LD.buf + LD.buf_ptr) < 2 || retval > 255)
|
||
|
retval = 'd';
|
||
|
else
|
||
|
LD.buf_ptr = endp - LD.buf;
|
||
|
}
|
||
|
else
|
||
|
retval = 'd';
|
||
|
break;
|
||
|
case '0'...'9':
|
||
|
retval = strtol (&LD.buf[LD.buf_ptr], &endp, 8);
|
||
|
if (endp - (LD.buf + LD.buf_ptr) < 2 || retval > 255)
|
||
|
retval = LD.buf[LD.buf_ptr++];
|
||
|
else
|
||
|
LD.buf_ptr = endp - LD.buf;
|
||
|
break;
|
||
|
case 'a':
|
||
|
retval = '\a';
|
||
|
++LD.buf_ptr;
|
||
|
break;
|
||
|
case 'b':
|
||
|
retval = '\b';
|
||
|
++LD.buf_ptr;
|
||
|
break;
|
||
|
case 'f':
|
||
|
retval = '\f';
|
||
|
++LD.buf_ptr;
|
||
|
break;
|
||
|
case 'n':
|
||
|
retval = '\n';
|
||
|
++LD.buf_ptr;
|
||
|
break;
|
||
|
case 'r':
|
||
|
retval = '\r';
|
||
|
++LD.buf_ptr;
|
||
|
break;
|
||
|
case 't':
|
||
|
retval = '\t';
|
||
|
++LD.buf_ptr;
|
||
|
break;
|
||
|
case 'v':
|
||
|
retval = '\v';
|
||
|
++LD.buf_ptr;
|
||
|
break;
|
||
|
default:
|
||
|
retval = LD.buf[LD.buf_ptr++];
|
||
|
break;
|
||
|
}
|
||
|
}
|
||
|
else if (isdigit (LD.buf[LD.buf_ptr]))
|
||
|
{
|
||
|
char *endp;
|
||
|
|
||
|
*token_len = strtol (&LD.buf[LD.buf_ptr], &endp, 10);
|
||
|
LD.buf_ptr = endp - LD.buf;
|
||
|
retval = TOK_NUMBER;
|
||
|
}
|
||
|
else if (LD.buf[LD.buf_ptr] == '-' && LD.buf[LD.buf_ptr + 1] == '1')
|
||
|
{
|
||
|
LD.buf_ptr += 2;
|
||
|
retval = TOK_MINUS1;
|
||
|
}
|
||
|
else
|
||
|
{
|
||
|
int ch = get_char (); /* token, token_len); */
|
||
|
if (ch != -1)
|
||
|
{
|
||
|
*token_len = ch;
|
||
|
retval = TOK_CHAR;
|
||
|
}
|
||
|
else
|
||
|
retval = TOK_ILL_CHAR;
|
||
|
}
|
||
|
|
||
|
/* Ignore white space. */
|
||
|
while (isspace (LD.buf[LD.buf_ptr]))
|
||
|
++LD.buf_ptr;
|
||
|
}
|
||
|
while (start_again != 0);
|
||
|
|
||
|
++LD.returned_tokens;
|
||
|
return retval;
|
||
|
}
|
||
|
|
||
|
|
||
|
/* Code a character with UTF-8 if the character map has multi-byte
|
||
|
characters. */
|
||
|
int
|
||
|
char_to_utf (char *buf, int char_val)
|
||
|
{
|
||
|
if (charmap_data.mb_cur_max == 1)
|
||
|
{
|
||
|
*buf++ = char_val;
|
||
|
return 1;
|
||
|
}
|
||
|
else
|
||
|
{
|
||
|
/* The number of bits coded in each character. */
|
||
|
#define CBPC 6
|
||
|
static struct coding_tab
|
||
|
{
|
||
|
int mask;
|
||
|
int val;
|
||
|
}
|
||
|
tab[] =
|
||
|
{
|
||
|
{ 0x7f, 0x00 },
|
||
|
{ 0x7ff, 0xc0 },
|
||
|
{ 0xffff, 0xe0 },
|
||
|
{ 0x1fffff, 0xf0 },
|
||
|
{ 0x3ffffff, 0xf8 },
|
||
|
{ 0x7fffffff, 0xfc },
|
||
|
{ 0, }
|
||
|
};
|
||
|
struct coding_tab *t;
|
||
|
int c;
|
||
|
int cnt = 1;
|
||
|
|
||
|
for (t = tab; char_val > t->mask; ++t, ++cnt)
|
||
|
;
|
||
|
|
||
|
c = cnt;
|
||
|
|
||
|
buf += cnt;
|
||
|
while (c > 1)
|
||
|
{
|
||
|
*--buf = 0x80 | (char_val & ((1 << CBPC) - 1));
|
||
|
char_val >>= CBPC;
|
||
|
--c;
|
||
|
}
|
||
|
|
||
|
*--buf = t->val | char_val;
|
||
|
|
||
|
return cnt;
|
||
|
}
|
||
|
}
|
||
|
|
||
|
|
||
|
/* Ignore rest of line upto ENDOFLINE token, starting with given token.
|
||
|
If WARN_FLAG is set warn about any token but ENDOFLINE. */
|
||
|
void
|
||
|
ignore_to_eol (int token, int warn_flag)
|
||
|
{
|
||
|
if (token == TOK_ENDOFLINE)
|
||
|
return;
|
||
|
|
||
|
if (LD.buf[LD.buf_ptr] != '\0' && warn_flag)
|
||
|
error (0, 0, gettext ("%s:%Zd: trailing garbage at end of line"),
|
||
|
locfile_data.filename, locfile_data.line_no);
|
||
|
|
||
|
while (LD.continue_line)
|
||
|
{
|
||
|
LD.continue_line = 0;
|
||
|
|
||
|
/* Increment line number counter. */
|
||
|
++LD.line_no;
|
||
|
|
||
|
if (fgets (LD.buf, LD.bufsize, stdin) != NULL)
|
||
|
{
|
||
|
/* We now have to look whether this line is continued and
|
||
|
whether it at all fits into our buffer. */
|
||
|
int linelen = strlen (LD.buf);
|
||
|
|
||
|
if (linelen == LD.bufsize - 1)
|
||
|
/* The did not fit into the buffer. */
|
||
|
error (2, 0, gettext ("%s:%Zd: line too long; use `getconf "
|
||
|
"LINE_MAX' to get the current maximum "
|
||
|
"line length"), LD.filename, LD.line_no);
|
||
|
|
||
|
/* Remove '\n' at end of line. */
|
||
|
if (LD.buf[linelen - 1] == '\n')
|
||
|
--linelen;
|
||
|
|
||
|
if (LD.buf[linelen - 1] == LD.escape_char)
|
||
|
LD.continue_line = 1;
|
||
|
}
|
||
|
}
|
||
|
|
||
|
/* This causes to begin the next line. */
|
||
|
LD.buf_ptr = LD.bufsize;
|
||
|
}
|
||
|
|
||
|
|
||
|
/* Return the value of the character at the beginning of the input buffer.
|
||
|
Symbolic character constants are expanded. */
|
||
|
static int
|
||
|
get_char (void)
|
||
|
{
|
||
|
if (LD.buf[LD.buf_ptr] == '<')
|
||
|
/* This is a symbolic character name. */
|
||
|
{
|
||
|
int char_val;
|
||
|
char *startp = LD.buf + (++LD.buf_ptr);
|
||
|
char *endp = startp;
|
||
|
|
||
|
while (LD.buf[LD.buf_ptr] != '>' && isprint (LD.buf[LD.buf_ptr]))
|
||
|
{
|
||
|
if (LD.buf[LD.buf_ptr] == '\0'
|
||
|
|| (LD.buf[LD.buf_ptr] == LD.escape_char
|
||
|
&& LD.buf[++LD.buf_ptr] == '\0'))
|
||
|
break;
|
||
|
|
||
|
*endp++ = LD.buf[LD.buf_ptr++];
|
||
|
}
|
||
|
|
||
|
if (LD.buf[LD.buf_ptr] != '>' && LD.buf[LD.buf_ptr] == '\0')
|
||
|
{
|
||
|
error (0, 0, gettext ("%s:%Zd: end of line in character symbol"),
|
||
|
LD.filename, LD.line_no);
|
||
|
|
||
|
if (startp == endp)
|
||
|
return -1;
|
||
|
}
|
||
|
else
|
||
|
++LD.buf_ptr;
|
||
|
|
||
|
char_val = find_char (startp, endp - startp);
|
||
|
if (char_val == -1 && verbose != 0 && reject_new_char != 0)
|
||
|
{
|
||
|
/* Locale defintions are often given very general. Missing
|
||
|
characters are only reported when explicitely requested. */
|
||
|
char tmp[endp - startp + 3];
|
||
|
|
||
|
tmp[0] = '<';
|
||
|
memcpy (tmp + 1, startp, endp - startp);
|
||
|
tmp[endp - startp + 1] = '>';
|
||
|
tmp[endp - startp + 2] = '\0';
|
||
|
|
||
|
error (0, 0, gettext ("%s:%Zd: character `%s' not defined"),
|
||
|
LD.filename, LD.line_no, tmp);
|
||
|
}
|
||
|
|
||
|
return char_val;
|
||
|
}
|
||
|
else
|
||
|
return (int) LD.buf[LD.buf_ptr++];
|
||
|
}
|
||
|
|
||
|
/*
|
||
|
* Local Variables:
|
||
|
* mode:c
|
||
|
* c-basic-offset:2
|
||
|
* End:
|
||
|
*/
|