mirror of
https://sourceware.org/git/glibc.git
synced 2024-11-18 11:00:07 +00:00
525 lines
13 KiB
C
525 lines
13 KiB
C
|
/* Copyright (C) 1995 Free Software Foundation, Inc.
|
||
|
|
||
|
The GNU C Library is free software; you can redistribute it and/or
|
||
|
modify it under the terms of the GNU Library General Public License as
|
||
|
published by the Free Software Foundation; either version 2 of the
|
||
|
License, or (at your option) any later version.
|
||
|
|
||
|
The GNU C Library is distributed in the hope that it will be useful,
|
||
|
but WITHOUT ANY WARRANTY; without even the implied warranty of
|
||
|
MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the GNU
|
||
|
Library General Public License for more details.
|
||
|
|
||
|
You should have received a copy of the GNU Library General Public
|
||
|
License along with the GNU C Library; see the file COPYING.LIB. If
|
||
|
not, write to the Free Software Foundation, Inc., 675 Mass Ave,
|
||
|
Cambridge, MA 02139, USA. */
|
||
|
|
||
|
#include <ctype.h>
|
||
|
#include <errno.h>
|
||
|
#include <libintl.h>
|
||
|
#include <limits.h>
|
||
|
#include <stdio.h>
|
||
|
#include <stdlib.h>
|
||
|
#include <string.h>
|
||
|
#include <unistd.h>
|
||
|
|
||
|
#include "localedef.h"
|
||
|
#include "hash.h"
|
||
|
|
||
|
/* Data structure for representing charmap database. */
|
||
|
struct charmap charmap_data;
|
||
|
|
||
|
/* Line number in charmap file. */
|
||
|
static unsigned int line_no;
|
||
|
|
||
|
/* Prototypes for local functions. */
|
||
|
static void read_prolog (FILE *infile);
|
||
|
static unsigned long read_body (FILE *infile);
|
||
|
|
||
|
|
||
|
/* Read complete table of symbolic names for character set from file. If
|
||
|
this file does not exist or is not readable a default file is tried.
|
||
|
If this also is not readable no character map is defined. */
|
||
|
void
|
||
|
charmap_read (const char *filename)
|
||
|
{
|
||
|
unsigned long max_char;
|
||
|
long path_max = pathconf (".", _PC_PATH_MAX);
|
||
|
char buf[path_max];
|
||
|
FILE *infile = NULL;
|
||
|
|
||
|
/* Initialize charmap data. */
|
||
|
charmap_data.codeset_name = NULL;
|
||
|
charmap_data.mb_cur_max = -1;
|
||
|
charmap_data.mb_cur_min = -1;
|
||
|
charmap_data.escape_char = '\\';
|
||
|
charmap_data.comment_char = '#';
|
||
|
|
||
|
if (filename != NULL)
|
||
|
{
|
||
|
strcpy (buf, filename);
|
||
|
infile = fopen (filename, "r");
|
||
|
if (infile == NULL && filename[0] != '/')
|
||
|
{
|
||
|
snprintf (buf, path_max, "%s/%s", CHARMAP_PATH, filename);
|
||
|
infile = fopen (buf, "r");
|
||
|
}
|
||
|
}
|
||
|
if (infile == NULL)
|
||
|
{
|
||
|
if (filename != NULL)
|
||
|
error (0, errno, gettext ("input file `%s' not found"), filename);
|
||
|
|
||
|
snprintf (buf, path_max, "%s/%s", CHARMAP_PATH, DEFAULT_CHARMAP);
|
||
|
infile = fopen (buf, "r");
|
||
|
|
||
|
if (infile == NULL)
|
||
|
error (4, errno, gettext ("input file `%s' not found"), filename);
|
||
|
}
|
||
|
|
||
|
charmap_data.filename = buf;
|
||
|
init_hash (&charmap_data.table, 500);
|
||
|
line_no = 0;
|
||
|
|
||
|
/* Read the prolog of the charmap file. */
|
||
|
read_prolog (infile);
|
||
|
|
||
|
/* Last works on the charmap tables global data. */
|
||
|
if (charmap_data.mb_cur_max == -1)
|
||
|
charmap_data.mb_cur_max = 1;
|
||
|
if (charmap_data.mb_cur_min == -1)
|
||
|
charmap_data.mb_cur_min = charmap_data.mb_cur_max;
|
||
|
|
||
|
if ((size_t) charmap_data.mb_cur_max > sizeof (long))
|
||
|
{
|
||
|
error (2, 0, gettext ("program limitation: for now only upto %Zu "
|
||
|
"bytes per character are allowed"), sizeof (long));
|
||
|
}
|
||
|
|
||
|
/* Now process all entries. */
|
||
|
max_char = read_body (infile);
|
||
|
|
||
|
/* We don't need the file anymore. */
|
||
|
fclose (infile);
|
||
|
|
||
|
|
||
|
/* Determine the optimal table size when using the simple modulo hashing
|
||
|
function. */
|
||
|
if (max_char >= 256)
|
||
|
{
|
||
|
int size;
|
||
|
/* Current best values, initialized to some never reached high value. */
|
||
|
int best_count = 10000;
|
||
|
int best_size = 10000;
|
||
|
int best_product = best_count * best_size;
|
||
|
|
||
|
/* Give warning. */
|
||
|
error (-1, 0, gettext ("computing character table size: this may take "
|
||
|
"a while"));
|
||
|
|
||
|
for (size = 256; size <= best_product; ++size)
|
||
|
{
|
||
|
/* Array with slot counters. */
|
||
|
int cnt[size];
|
||
|
/* Current character. */
|
||
|
int ch;
|
||
|
/* Maximal number of characters in any slot. */
|
||
|
int maxcnt = 0;
|
||
|
/* Product of current size and maximal count. */
|
||
|
int product = 0;
|
||
|
/* Iteration pointer through hashing table. */
|
||
|
char *ptr = NULL;
|
||
|
|
||
|
/* Initializes counters to zero. */
|
||
|
memset(cnt, 0, size * sizeof (int));
|
||
|
|
||
|
/* Iterate through whole hashing table. */
|
||
|
while (product < best_product
|
||
|
&& iterate_table (&charmap_data.table, (void **) &ptr,
|
||
|
(void **) &ch))
|
||
|
{
|
||
|
/* Increment slot counter. */
|
||
|
++cnt[ch % size];
|
||
|
/* Test for current maximum. */
|
||
|
if (cnt[ch % size] > maxcnt)
|
||
|
{
|
||
|
maxcnt = cnt[ch % size];
|
||
|
product = maxcnt * size;
|
||
|
}
|
||
|
}
|
||
|
|
||
|
if (product < best_product)
|
||
|
{
|
||
|
best_count = maxcnt;
|
||
|
best_size = size;
|
||
|
best_product = best_count * best_size;
|
||
|
}
|
||
|
}
|
||
|
|
||
|
charmap_data.hash_size = best_size;
|
||
|
charmap_data.hash_layers = best_count;
|
||
|
}
|
||
|
else
|
||
|
{
|
||
|
charmap_data.hash_size = 256;
|
||
|
charmap_data.hash_layers = 1;
|
||
|
}
|
||
|
}
|
||
|
|
||
|
|
||
|
#define SYNTAX_ERROR \
|
||
|
do { error (0, 0, gettext ("%s:%u: syntax error in charmap file"), \
|
||
|
charmap_data.filename, line_no); \
|
||
|
goto end_of_loop; } while (0)
|
||
|
|
||
|
/* Read the prolog of the charmap file until the line containing `CHARMAP'.
|
||
|
All possible entries are processed. */
|
||
|
static void
|
||
|
read_prolog (FILE *infile)
|
||
|
{
|
||
|
size_t bufsize = sysconf (_SC_LINE_MAX);
|
||
|
char buf[bufsize];
|
||
|
|
||
|
while (1)
|
||
|
{
|
||
|
char *cp = buf;
|
||
|
char len;
|
||
|
|
||
|
/* Read the next line. */
|
||
|
fgets (buf, bufsize, infile);
|
||
|
len = strlen (buf);
|
||
|
|
||
|
/* On EOF simply return. */
|
||
|
if (len == 0 || buf[len - 1] != '\n')
|
||
|
error (4, 0, gettext ("%s: unexpected end of file in charmap"),
|
||
|
charmap_data.filename);
|
||
|
|
||
|
/* This is the next line. */
|
||
|
++line_no;
|
||
|
|
||
|
/* Comments and empty lines are ignored. */
|
||
|
if (len == 1 || buf[0] == charmap_data.comment_char)
|
||
|
continue;
|
||
|
|
||
|
buf[len - 1] = '\0';
|
||
|
|
||
|
/* Throw away leading white spaces. This is not defined in POSIX.2
|
||
|
so don't do it if conformance is requested. */
|
||
|
if (!posix_conformance)
|
||
|
while (isspace (*cp))
|
||
|
++cp;
|
||
|
|
||
|
/* If `CHARMAP' is read the prolog is over. */
|
||
|
if (strncmp (cp, "CHARMAP", 7) == 0
|
||
|
&& (!posix_conformance || cp[7] == '\0'))
|
||
|
return;
|
||
|
|
||
|
/* Now it can be only one of special symbols defining the charmap
|
||
|
parameters. All are beginning with '<'. */
|
||
|
if (*cp != '<')
|
||
|
SYNTAX_ERROR;
|
||
|
|
||
|
++cp;
|
||
|
if (strncmp (cp, "code_set_name>", 14) == 0)
|
||
|
{
|
||
|
char *startp;
|
||
|
|
||
|
#define cp_to_arg(no,pred) \
|
||
|
cp += no; \
|
||
|
while (isspace (*cp)) \
|
||
|
++cp; \
|
||
|
if (*cp == '\0' || !pred (*cp)) \
|
||
|
SYNTAX_ERROR;
|
||
|
|
||
|
cp_to_arg (14,isgraph)
|
||
|
|
||
|
if (charmap_data.codeset_name != NULL)
|
||
|
{
|
||
|
error (0, 0, gettext ("%s:%u: duplicate code set name "
|
||
|
"specification"),
|
||
|
charmap_data.filename, line_no);
|
||
|
free (charmap_data.codeset_name);
|
||
|
}
|
||
|
|
||
|
startp = cp;
|
||
|
while (*cp != '\0' && isgraph (*cp) && !isspace (*cp))
|
||
|
++cp;
|
||
|
|
||
|
charmap_data.codeset_name = (char *) xmalloc (cp - startp + 1);
|
||
|
strncpy (startp, startp, cp - startp);
|
||
|
}
|
||
|
else if (strncmp (cp, "mb_cur_max>", 11) == 0)
|
||
|
{
|
||
|
int new_val;
|
||
|
cp_to_arg (11,isdigit)
|
||
|
|
||
|
if (charmap_data.mb_cur_max != -1)
|
||
|
error (0, 0,
|
||
|
gettext ("%s:%u: duplicate definition of mb_cur_max"),
|
||
|
charmap_data.filename, line_no);
|
||
|
|
||
|
new_val = (int) strtol (cp, &cp, posix_conformance ? 10 : 0);
|
||
|
if (new_val < 1)
|
||
|
error (0, 0, gettext ("%s:%u: illegal value for mb_cur_max: %d"),
|
||
|
charmap_data.filename, line_no, new_val);
|
||
|
else
|
||
|
charmap_data.mb_cur_max = new_val;
|
||
|
}
|
||
|
else if (strncmp (cp, "mb_cur_min>", 11) == 0)
|
||
|
{
|
||
|
int new_val;
|
||
|
cp_to_arg (11,isdigit)
|
||
|
|
||
|
if (charmap_data.mb_cur_max != -1)
|
||
|
error (0, 0,
|
||
|
gettext ("%s:%u: duplicate definition of mb_cur_min"),
|
||
|
charmap_data.filename, line_no);
|
||
|
|
||
|
new_val = (int) strtol (cp, &cp, posix_conformance ? 10 : 0);
|
||
|
if (new_val < 1)
|
||
|
error (0, 0, gettext ("%s:%u: illegal value for mb_cur_min: %d"),
|
||
|
charmap_data.filename, line_no, new_val);
|
||
|
else
|
||
|
charmap_data.mb_cur_min = new_val;
|
||
|
}
|
||
|
else if (strncmp (cp, "escape_char>", 12) == 0)
|
||
|
{
|
||
|
cp_to_arg (12, isgraph)
|
||
|
charmap_data.escape_char = *cp;
|
||
|
}
|
||
|
else if (strncmp (cp, "comment_char>", 13) == 0)
|
||
|
{
|
||
|
cp_to_arg (13, isgraph)
|
||
|
charmap_data.comment_char = *cp;
|
||
|
}
|
||
|
else
|
||
|
SYNTAX_ERROR;
|
||
|
end_of_loop:
|
||
|
}
|
||
|
}
|
||
|
#undef cp_to_arg
|
||
|
|
||
|
|
||
|
static unsigned long
|
||
|
read_body (FILE *infile)
|
||
|
{
|
||
|
unsigned long max_char = 0;
|
||
|
size_t bufsize = sysconf (_SC_LINE_MAX);
|
||
|
char buf[bufsize];
|
||
|
char name_str[bufsize / 2];
|
||
|
char code_str[bufsize / 2];
|
||
|
|
||
|
while (1)
|
||
|
{
|
||
|
char *cp = buf;
|
||
|
size_t len;
|
||
|
|
||
|
/* Read the next line. */
|
||
|
fgets (buf, bufsize, infile);
|
||
|
len = strlen (buf);
|
||
|
|
||
|
/* On EOF simply return. */
|
||
|
if (len == 0)
|
||
|
error (0, 0, gettext ("%s: `END CHARMAP' is missing"),
|
||
|
charmap_data.filename);
|
||
|
|
||
|
/* This is the next line. */
|
||
|
++line_no;
|
||
|
|
||
|
if (len == bufsize - 1)
|
||
|
{
|
||
|
error (0, 0, gettext ("%s:%u: line too long; use `getconf "
|
||
|
"LINE_MAX' to get the current maximum line"
|
||
|
"length"), charmap_data.filename, line_no);
|
||
|
do
|
||
|
{
|
||
|
fgets (buf, bufsize, infile);
|
||
|
len = strlen (buf);
|
||
|
}
|
||
|
while (len == bufsize - 1);
|
||
|
continue;
|
||
|
}
|
||
|
|
||
|
/* Comments and empty lines are ignored. */
|
||
|
if (len == 1 || buf[0] == charmap_data.comment_char)
|
||
|
continue;
|
||
|
|
||
|
buf[len - 1] = '\0';
|
||
|
|
||
|
/* Throw away leading white spaces. This is not defined in POSIX.2
|
||
|
so don't do it if conformance is requested. */
|
||
|
if (!posix_conformance)
|
||
|
while (isspace (*cp))
|
||
|
++cp;
|
||
|
|
||
|
if (*cp == '<')
|
||
|
{
|
||
|
char *end1p, *end2p, *start2p;
|
||
|
size_t cnt = 0;
|
||
|
unsigned long char_value = 0;
|
||
|
|
||
|
if (sscanf (cp + 1, "%s %s", name_str, code_str) != 2)
|
||
|
SYNTAX_ERROR;
|
||
|
|
||
|
end1p = cp = name_str;
|
||
|
while (*cp != '\0' && *cp != '>')
|
||
|
{
|
||
|
if (*cp == charmap_data.escape_char)
|
||
|
if (*++cp == '\0')
|
||
|
SYNTAX_ERROR;
|
||
|
*end1p++ = *cp++;
|
||
|
}
|
||
|
if (*cp == '\0')
|
||
|
/* No final '>'. Make error condition. */
|
||
|
end1p = name_str;
|
||
|
else
|
||
|
++cp;
|
||
|
|
||
|
*end1p = '\0';
|
||
|
|
||
|
if (*cp == '.' && *++cp == '.' && *++cp == '.' && *++cp == '<')
|
||
|
{
|
||
|
/* This might be the alternate form. */
|
||
|
start2p = end2p = ++cp;
|
||
|
while (*cp != '\0' && *cp != '>')
|
||
|
{
|
||
|
if (*cp == charmap_data.escape_char)
|
||
|
if (*++cp == '\0')
|
||
|
SYNTAX_ERROR;
|
||
|
*end2p = *cp++;
|
||
|
}
|
||
|
if (*cp == '\0')
|
||
|
/* NO final '>'. Make error condition. */
|
||
|
end2p = start2p;
|
||
|
else
|
||
|
++cp;
|
||
|
}
|
||
|
else
|
||
|
start2p = end2p = NULL;
|
||
|
|
||
|
|
||
|
if (end1p == name_str || (start2p != NULL && start2p != end2p)
|
||
|
|| *cp != '\0'
|
||
|
|| *code_str != charmap_data.escape_char)
|
||
|
SYNTAX_ERROR;
|
||
|
|
||
|
cp = code_str;
|
||
|
do
|
||
|
{
|
||
|
char *begin;
|
||
|
long val;
|
||
|
|
||
|
switch (*++cp)
|
||
|
{
|
||
|
case 'd':
|
||
|
val = strtol ((begin = cp + 1), &cp, 10);
|
||
|
break;
|
||
|
case 'x':
|
||
|
val = strtol ((begin = cp + 1), &cp, 16);
|
||
|
break;
|
||
|
default:
|
||
|
val = strtol ((begin = cp), &cp, 8);
|
||
|
break;
|
||
|
}
|
||
|
if (begin == cp)
|
||
|
SYNTAX_ERROR;
|
||
|
|
||
|
if (posix_conformance && cp - begin < 2)
|
||
|
error (0, 0, gettext ("%s:%u: byte constant has less than "
|
||
|
"two digits"),
|
||
|
charmap_data.filename, line_no);
|
||
|
|
||
|
if (val < 0 || val > 255)
|
||
|
{
|
||
|
error (0, 0, gettext ("%s:%u: character encoding must be "
|
||
|
"given in 8-bit bytes"),
|
||
|
charmap_data.filename, line_no);
|
||
|
goto end_of_loop;
|
||
|
}
|
||
|
|
||
|
if (cnt < (size_t) charmap_data.mb_cur_max)
|
||
|
{
|
||
|
if (cnt < sizeof (long)) /* FIXME */
|
||
|
char_value = (char_value << 8) | val;
|
||
|
}
|
||
|
else
|
||
|
{
|
||
|
error (0, 0, gettext ("%s:%u: number of bytes in character "
|
||
|
"definition exceeds `mb_cur_max'"),
|
||
|
charmap_data.filename, line_no);
|
||
|
break;
|
||
|
}
|
||
|
++cnt;
|
||
|
}
|
||
|
while (*cp == charmap_data.escape_char);
|
||
|
|
||
|
/* Ignore the rest of the line (comment). */
|
||
|
if (end2p == NULL)
|
||
|
{
|
||
|
if (insert_entry (&charmap_data.table, name_str,
|
||
|
end1p - name_str, (void *) char_value))
|
||
|
error (0, 0, gettext ("%s:%u: duplicate entry"),
|
||
|
charmap_data.filename, line_no);
|
||
|
|
||
|
max_char = MAX (max_char, char_value);
|
||
|
}
|
||
|
else
|
||
|
{
|
||
|
char *en1, *en2, *start1p;
|
||
|
long n1, n2, n;
|
||
|
|
||
|
start1p = name_str;
|
||
|
|
||
|
while (*start1p == *start2p && !isdigit (*start1p)
|
||
|
&& start1p < end1p)
|
||
|
++start1p, ++start2p;
|
||
|
|
||
|
n1 = strtol (start1p, &en1, 10);
|
||
|
n2 = strtol (start2p, &en2, 10);
|
||
|
|
||
|
if (en1 - start1p != en2 - start2p || en1 != end1p
|
||
|
|| en2 != end2p)
|
||
|
SYNTAX_ERROR;
|
||
|
|
||
|
if (n1 > n2)
|
||
|
error (0, 0, gettext ("%s:%u: starting character is bigger "
|
||
|
"than last"),
|
||
|
charmap_data.filename, line_no);
|
||
|
|
||
|
n = n1;
|
||
|
while (n <= n2)
|
||
|
{
|
||
|
snprintf(start1p, en1 - start1p, "%0*d", en1 - start1p, n);
|
||
|
|
||
|
if (insert_entry (&charmap_data.table, name_str,
|
||
|
en1 - name_str,
|
||
|
(void *) (char_value + n - n1)))
|
||
|
error (0, 0, gettext ("%s:%u: duplicate entry"),
|
||
|
charmap_data.filename, line_no);
|
||
|
|
||
|
max_char = MAX (max_char, char_value + n - n1);
|
||
|
++n;
|
||
|
}
|
||
|
}
|
||
|
}
|
||
|
else
|
||
|
{
|
||
|
if (strncmp (cp, "END CHARMAP", 11) == 0)
|
||
|
return max_char;
|
||
|
|
||
|
SYNTAX_ERROR;
|
||
|
}
|
||
|
end_of_loop:
|
||
|
}
|
||
|
|
||
|
return max_char;
|
||
|
}
|
||
|
|
||
|
/*
|
||
|
* Local Variables:
|
||
|
* mode:c
|
||
|
* c-basic-offset:2
|
||
|
* End:
|
||
|
*/
|