1998-04-01 17:38  Ulrich Drepper  <drepper@cygnus.com>

	* iconv/gconv?simple.c: New builtins for UCS en/decoding.
	* iconv/gconv_builtin.h: Add definitions for new builtins.
	* iconv/gconv.h: Add prototypes for new builtins.

	* iconvdata/Makefile (modules): Add ISO646.
	Add rules for ISO646 module.
	(distribute): Add iso646.c.
	* iconvdata/gconv-modules: Add module and alias definition for
	ISO646 charsets.
	* iconvdata/iso646.c: New file.
This commit is contained in:
Ulrich Drepper 1998-04-01 17:44:34 +00:00
parent 8fe0fd03e5
commit d2374599d4
7 changed files with 573 additions and 7 deletions

View File

@ -1,3 +1,16 @@
1998-04-01 17:38 Ulrich Drepper <drepper@cygnus.com>
* iconv/gconv?simple.c: New builtins for UCS en/decoding.
* iconv/gconv_builtin.h: Add definitions for new builtins.
* iconv/gconv.h: Add prototypes for new builtins.
* iconvdata/Makefile (modules): Add ISO646.
Add rules for ISO646 module.
(distribute): Add iso646.c.
* iconvdata/gconv-modules: Add module and alias definition for
ISO646 charsets.
* iconvdata/iso646.c: New file.
1998-04-01 16:10 Ulrich Drepper <drepper@cygnus.com> 1998-04-01 16:10 Ulrich Drepper <drepper@cygnus.com>
* libc.map: Add __gconv_alias_db, __gconv_nmodules, __gconv_modules_db. * libc.map: Add __gconv_alias_db, __gconv_nmodules, __gconv_modules_db.

View File

@ -1,4 +1,4 @@
/* Copyright (C) 1997 Free Software Foundation, Inc. /* Copyright (C) 1997, 1998 Free Software Foundation, Inc.
This file is part of the GNU C Library. This file is part of the GNU C Library.
Contributed by Ulrich Drepper <drepper@cygnus.com>, 1997. Contributed by Ulrich Drepper <drepper@cygnus.com>, 1997.
@ -200,6 +200,8 @@ extern void __gconv_get_builtin_trans __P ((__const char *__name,
__BUILTIN_TRANS (__gconv_transform_dummy); __BUILTIN_TRANS (__gconv_transform_dummy);
__BUILTIN_TRANS (__gconv_transform_ucs4_utf8); __BUILTIN_TRANS (__gconv_transform_ucs4_utf8);
__BUILTIN_TRANS (__gconv_transform_utf8_ucs4); __BUILTIN_TRANS (__gconv_transform_utf8_ucs4);
__BUILTIN_TRANS (__gconv_transform_ucs2_ucs4);
__BUILTIN_TRANS (__gconv_transform_ucs4_ucs2);
# undef __BUITLIN_TRANS # undef __BUITLIN_TRANS
extern int __gconv_transform_init_rstate __P ((struct gconv_step *__step, extern int __gconv_transform_init_rstate __P ((struct gconv_step *__step,

View File

@ -21,17 +21,32 @@
BUILTIN_ALIAS ("UTF8//", "ISO-10646/UTF8/") BUILTIN_ALIAS ("UTF8//", "ISO-10646/UTF8/")
BUILTIN_ALIAS ("UTF-8//", "ISO-10646/UTF8/") BUILTIN_ALIAS ("UTF-8//", "ISO-10646/UTF8/")
BUILTIN_TRANSFORMATION ("([^/]+)/UCS4/([^/]*)", NULL, 0, BUILTIN_TRANSFORMATION (NULL, "ISO-10646/UCS4/", 15,
"\\1/UTF8/\\2", 1, "=ucs4->utf8", "ISO-10646/UTF8/", 1, "=ucs4->utf8",
__gconv_transform_ucs4_utf8, __gconv_transform_ucs4_utf8,
__gconv_transform_init_rstate, __gconv_transform_init_rstate,
__gconv_transform_end_rstate) __gconv_transform_end_rstate)
BUILTIN_TRANSFORMATION ("([^/]+)/UTF-?8/([^/]*)", NULL, 0, BUILTIN_TRANSFORMATION ("ISO-10646/UTF-?8/", "ISO-10646/UTF", 13,
"\\1/UCS4/\\2", 1, "=utf8->ucs4", "ISO-10646/UCS4/", 1, "=utf8->ucs4",
__gconv_transform_utf8_ucs4, __gconv_transform_utf8_ucs4,
__gconv_transform_init_rstate, __gconv_transform_init_rstate,
__gconv_transform_end_rstate) __gconv_transform_end_rstate)
BUILTIN_ALIAS ("UCS2//", "ISO-10646/UCS2/")
BUILTIN_ALIAS ("UCS-2//", "ISO-10646/UCS2/")
BUILTIN_TRANSFORMATION (NULL, "ISO-10646/UCS2/", 15, "ISO-10646/UCS4/",
1, "=ucs2->ucs4",
__gconv_transform_ucs2_ucs4,
__gconv_transform_init_rstate,
__gconv_transform_end_rstate)
BUILTIN_TRANSFORMATION (NULL, "ISO-10646/UCS4/", 15, "ISO-10646/UCS2/",
1, "=ucs4->ucs2",
__gconv_transform_ucs4_ucs2,
__gconv_transform_init_rstate,
__gconv_transform_end_rstate)
BUILTIN_TRANSFORMATION ("(.*)", NULL, 0, "\\1", 1, "=dummy", BUILTIN_TRANSFORMATION ("(.*)", NULL, 0, "\\1", 1, "=dummy",
__gconv_transform_dummy, NULL, NULL) __gconv_transform_dummy, NULL, NULL)

View File

@ -20,6 +20,7 @@
#include <errno.h> #include <errno.h>
#include <gconv.h> #include <gconv.h>
#include <stdint.h>
#include <stdlib.h> #include <stdlib.h>
#include <string.h> #include <string.h>
#include <wchar.h> #include <wchar.h>
@ -289,3 +290,253 @@ __gconv_transform_utf8_ucs4 (struct gconv_step *step,
return result; return result;
} }
int
__gconv_transform_ucs2_ucs4 (struct gconv_step *step,
struct gconv_step_data *data, const char *inbuf,
size_t *inlen, size_t *written, int do_flush)
{
struct gconv_step *next_step = step + 1;
struct gconv_step_data *next_data = data + 1;
gconv_fct fct = next_step->fct;
size_t do_write;
int result;
/* If the function is called with no input this means we have to reset
to the initial state. The possibly partly converted input is
dropped. */
if (do_flush)
{
/* Clear the state. */
memset (data->data, '\0', sizeof (mbstate_t));
do_write = 0;
/* Call the steps down the chain if there are any. */
if (data->is_last)
result = GCONV_OK;
else
{
struct gconv_step *next_step = step + 1;
struct gconv_step_data *next_data = data + 1;
result = (*fct) (next_step, next_data, NULL, 0, written, 1);
}
}
else
{
int save_errno = errno;
do_write = 0;
do
{
const uint16_t *newinbuf = (const uint16_t *) inbuf;
wchar_t *outbuf = (wchar_t *) &data->outbuf[data->outbufavail];
size_t actually = 0;
errno = 0;
while (data->outbufavail + 4 <= data->outbufsize
&& *inlen >= 2)
{
outbuf[actually++] = *newinbuf++;
data->outbufavail += 4;
*inlen -= 2;
}
if (*inlen != 1)
{
/* We have an incomplete input character. */
mbstate_t *state = (mbstate_t *) data->data;
state->count = 1;
state->value = *(uint8_t *) newinbuf;
--*inlen;
}
/* Remember how much we converted. */
do_write += actually * sizeof (wchar_t);
/* Check whether an illegal character appeared. */
if (errno != 0)
{
result = GCONV_ILLEGAL_INPUT;
break;
}
if (*inlen == 0 && !mbsinit ((mbstate_t *) data->data))
{
/* We have an incomplete character at the end. */
result = GCONV_INCOMPLETE_INPUT;
break;
}
if (data->is_last)
{
/* This is the last step. */
result = (data->outbufavail + sizeof (wchar_t) > data->outbufsize
? GCONV_FULL_OUTPUT : GCONV_EMPTY_INPUT);
break;
}
/* Status so far. */
result = GCONV_EMPTY_INPUT;
if (data->outbufavail > 0)
{
/* Call the functions below in the chain. */
size_t newavail = data->outbufavail;
result = (*fct) (next_step, next_data, data->outbuf, &newavail,
written, 0);
/* Correct the output buffer. */
if (newavail != data->outbufavail && newavail > 0)
{
memmove (data->outbuf,
&data->outbuf[data->outbufavail - newavail],
newavail);
data->outbufavail = newavail;
}
}
}
while (*inlen > 0 && result == GCONV_EMPTY_INPUT);
__set_errno (save_errno);
}
if (written != NULL && data->is_last)
*written = do_write;
return result;
}
int
__gconv_transform_ucs4_ucs2 (struct gconv_step *step,
struct gconv_step_data *data, const char *inbuf,
size_t *inlen, size_t *written, int do_flush)
{
struct gconv_step *next_step = step + 1;
struct gconv_step_data *next_data = data + 1;
gconv_fct fct = next_step->fct;
size_t do_write;
int result;
/* If the function is called with no input this means we have to reset
to the initial state. The possibly partly converted input is
dropped. */
if (do_flush)
{
/* Clear the state. */
memset (data->data, '\0', sizeof (mbstate_t));
do_write = 0;
/* Call the steps down the chain if there are any. */
if (data->is_last)
result = GCONV_OK;
else
{
struct gconv_step *next_step = step + 1;
struct gconv_step_data *next_data = data + 1;
result = (*fct) (next_step, next_data, NULL, 0, written, 1);
/* Clear output buffer. */
data->outbufavail = 0;
}
}
else
{
int save_errno = errno;
do_write = 0;
do
{
const wchar_t *newinbuf = (const wchar_t *) inbuf;
uint16_t *outbuf = (uint16_t *) &data->outbuf[data->outbufavail];
size_t actually = 0;
errno = 0;
while (data->outbufavail + 2 <= data->outbufsize
&& *inlen >= 4)
{
if (*newinbuf >= 0x10000)
{
__set_errno (EILSEQ);
break;
}
outbuf[actually++] = (wchar_t) *newinbuf;
*inlen -= 4;
data->outbufavail += 2;
}
if (*inlen < 4)
{
/* We have an incomplete input character. */
mbstate_t *state = (mbstate_t *) data->data;
state->count = *inlen;
state->value = 0;
while (*inlen > 0)
{
state->value <<= 8;
state->value += *(uint8_t *) newinbuf;
--*inlen;
}
}
/* Remember how much we converted. */
do_write += (const char *) newinbuf - inbuf;
/* Check whether an illegal character appeared. */
if (errno != 0)
{
result = GCONV_ILLEGAL_INPUT;
break;
}
if (*inlen == 0 && !mbsinit ((mbstate_t *) data->data))
{
/* We have an incomplete character at the end. */
result = GCONV_INCOMPLETE_INPUT;
break;
}
if (data->is_last)
{
/* This is the last step. */
result = *inlen == 0 ? GCONV_EMPTY_INPUT : GCONV_FULL_OUTPUT;
break;
}
/* Status so far. */
result = GCONV_EMPTY_INPUT;
if (data->outbufavail > 0)
{
/* Call the functions below in the chain. */
size_t newavail = data->outbufavail;
result = (*fct) (next_step, next_data, data->outbuf, &newavail,
written, 0);
/* Correct the output buffer. */
if (newavail != data->outbufavail && newavail > 0)
{
memmove (data->outbuf,
&data->outbuf[data->outbufavail - newavail],
newavail);
data->outbufavail = newavail;
}
}
}
while (*inlen > 0 && result == GCONV_EMPTY_INPUT);
__set_errno (save_errno);
}
if (written != NULL && data->is_last)
*written = do_write / sizeof (wchar_t);
return result;
}

View File

@ -26,7 +26,7 @@ modules := ISO8859-1 ISO8859-2 ISO8859-3 ISO8859-4 ISO8859-5 \
ISO8859-6 ISO8859-7 ISO8859-8 ISO8859-9 ISO8859-10 \ ISO8859-6 ISO8859-7 ISO8859-8 ISO8859-9 ISO8859-10 \
T.61 ISO_6937 SJIS KOI-8 KOI8-R LATIN-GREEK LATIN-GREEK-1 \ T.61 ISO_6937 SJIS KOI-8 KOI8-R LATIN-GREEK LATIN-GREEK-1 \
HP-ROMAN8 EBCDIC-AT-DE EBCDIC-AT-DE-A EBCDIC-CA-FR \ HP-ROMAN8 EBCDIC-AT-DE EBCDIC-AT-DE-A EBCDIC-CA-FR \
EUC-KR UHC JOHAB libJIS libKSC EUC-KR UHC JOHAB libJIS libKSC ISO646
modules.so := $(addsuffix .so, $(modules)) modules.so := $(addsuffix .so, $(modules))
@ -42,6 +42,7 @@ ISO8859-7-routines := iso8859-7
ISO8859-8-routines := iso8859-8 ISO8859-8-routines := iso8859-8
ISO8859-9-routines := iso8859-9 ISO8859-9-routines := iso8859-9
ISO8859-10-routines := iso8859-10 ISO8859-10-routines := iso8859-10
ISO646-routines := iso646
T.61-routines := t61 T.61-routines := t61
ISO_6937-routines := iso6937 ISO_6937-routines := iso6937
SJIS-routines := sjis SJIS-routines := sjis
@ -74,7 +75,8 @@ distribute := 8bit-generic.c 8bit-gap.c gap.pl gaptab.pl gconv-modules \
koi-8.c koi8-r.c koi8-r.h hp-roman8.c latin-greek.c \ koi-8.c koi8-r.c koi8-r.h hp-roman8.c latin-greek.c \
latin-greek.h latin-greek-1.c latin-greek-1.h ebcdic-at-de.c \ latin-greek.h latin-greek-1.c latin-greek-1.h ebcdic-at-de.c \
ebcdic-at-de-a.c ebcdic-ca-fr.c jis0201.c jis0208.c jis0212.c \ ebcdic-at-de-a.c ebcdic-ca-fr.c jis0201.c jis0208.c jis0212.c \
extra-module.mk euckr.c johab.c uhc.c ksc5601.c ksc5601.h extra-module.mk euckr.c johab.c uhc.c ksc5601.c ksc5601.h \
iso646.c
# We build the transformation modules only when we build shared libs. # We build the transformation modules only when we build shared libs.
ifeq (yes,$(build-shared)) ifeq (yes,$(build-shared))

View File

@ -38,6 +38,26 @@ alias ISO-10646// ISO-10646/UCS4/
alias 10646-1:1993// ISO-10646/UCS4/ alias 10646-1:1993// ISO-10646/UCS4/
alias 10646-1:1993/UCS4/ ISO-10646/UCS4/ alias 10646-1:1993/UCS4/ ISO-10646/UCS4/
# from to module cost
alias ISO-IR-6// ANSI_X3.4-1968//
alias ANSI_X3.4-1986// ANSI_X3.4-1968//
alias ISO_646.IRV:1991// ANSI_X3.4-1968//
alias ASCII// ANSI_X3.4-1968//
alias ISO646-US// ANSI_X3.4-1968//
alias US-ASCII// ANSI_X3.4-1968//
alias US// ANSI_X3.4-1968//
alias IBM367// ANSI_X3.4-1968//
alias CP367// ANSI_X3.4-1968//
module ANSI_X3.4-1968// ISO-10646/UCS4/ ISO646 1
module ISO-10646/UCS4/ ANSI_X3.4-1968// ISO646 1
alias ISO-IR-4// BS_4730//
alias ISO646-GB// BS_4730//
alias GB// BS_4730//
alias UK// BS_4730//
module BS_4730// ISO-10646/UCS4/ ISO646
module ISO-10646/UCS4/ BS_4730// ISO646
# from to module cost # from to module cost
alias ISO-IR-100// ISO-8859-1// alias ISO-IR-100// ISO-8859-1//
alias ISO_8859-1:1987// ISO-8859-1// alias ISO_8859-1:1987// ISO-8859-1//

263
iconvdata/iso646.c Normal file
View File

@ -0,0 +1,263 @@
/* Conversion to and from the various ISO 646 CCS.
Copyright (C) 1998 Free Software Foundation, Inc.
This file is part of the GNU C Library.
Contributed by Ulrich Drepper <drepper@cygnus.com>, 1998.
The GNU C Library is free software; you can redistribute it and/or
modify it under the terms of the GNU Library General Public License as
published by the Free Software Foundation; either version 2 of the
License, or (at your option) any later version.
The GNU C Library is distributed in the hope that it will be useful,
but WITHOUT ANY WARRANTY; without even the implied warranty of
MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the GNU
Library General Public License for more details.
You should have received a copy of the GNU Library General Public
License along with the GNU C Library; see the file COPYING.LIB. If not,
write to the Free Software Foundation, Inc., 59 Temple Place - Suite 330,
Boston, MA 02111-1307, USA. */
#include <gconv.h>
#include <stdlib.h>
#include <string.h>
/* Direction of the transformation. */
enum direction
{
illegal,
to_iso646,
from_iso646
};
enum variant
{
US, /* ANSI_X3.4-1968 */
GB, /* BS_4730 */
};
struct iso646_data
{
enum direction dir;
enum variant var;
};
int
gconv_init (struct gconv_step *step, struct gconv_step_data *data)
{
/* Determine which direction. */
struct iso646_data *new_data;
enum direction dir;
enum variant var;
int result;
if (strcasestr (step->from_name, "ANSI_X3.4-1968") != NULL)
{
dir = from_iso646;
var = US;
}
else if (strcasestr (step->from_name, "BS_4730") != NULL)
{
dir = from_iso646;
var = GB;
}
else if (strcasestr (step->to_name, "ANSI_X3.4-1968") != NULL)
{
dir = to_iso646;
var = US;
}
else if (strcasestr (step->to_name, "BS_4730") != NULL)
{
dir = to_iso646;
var = GB;
}
else
dir = illegal;
result = GCONV_NOCONV;
if (dir != illegal
&& ((new_data
= (struct iso646_data *) malloc (sizeof (struct iso646_data)))
!= NULL))
{
new_data->dir = dir;
new_data->var = var;
data->data = new_data;
result = GCONV_OK;
}
return result;
}
void
gconv_end (struct gconv_step_data *data)
{
free (data->data);
}
int
gconv (struct gconv_step *step, struct gconv_step_data *data,
const char *inbuf, size_t *inbufsize, size_t *written, int do_flush)
{
struct gconv_step *next_step = step + 1;
struct gconv_step_data *next_data = data + 1;
gconv_fct fct = next_step->fct;
size_t do_write;
int result;
/* If the function is called with no input this means we have to reset
to the initial state. The possibly partly converted input is
dropped. */
if (do_flush)
{
do_write = 0;
/* Call the steps down the chain if there are any. */
if (data->is_last)
result = GCONV_OK;
else
{
struct gconv_step *next_step = step + 1;
struct gconv_step_data *next_data = data + 1;
result = (*fct) (next_step, next_data, NULL, 0, written, 1);
/* Clear output buffer. */
data->outbufavail = 0;
}
}
else
{
enum direction dir = ((struct iso646_data *) data->data)->dir;
enum variant var = ((struct iso646_data *) data->data)->var;
do_write = 0;
do
{
result = GCONV_OK;
if (dir == from_iso646)
{
size_t inchars = *inbufsize;
size_t outwchars = data->outbufavail;
char *outbuf = data->outbuf;
size_t cnt = 0;
while (cnt < inchars
&& (outwchars + sizeof (wchar_t) <= data->outbufsize))
{
switch ((unsigned char) inbuf[cnt])
{
case '\x23':
if (var == GB)
*((wchar_t *) (outbuf + outwchars)) = 0xa3;
else
*((wchar_t *) (outbuf + outwchars)) = 0x23;
break;
case '\x75':
if (var == GB)
*((wchar_t *) (outbuf + outwchars)) = 0x203e;
else
*((wchar_t *) (outbuf + outwchars)) = 0x75;
break;
default:
*((wchar_t *) (outbuf + outwchars)) =
(unsigned char) inbuf[cnt];
}
++do_write;
outwchars += sizeof (wchar_t);
++cnt;
}
*inbufsize -= cnt;
data->outbufavail = outwchars;
}
else
{
size_t inwchars = *inbufsize;
size_t outchars = data->outbufavail;
char *outbuf = data->outbuf;
size_t cnt = 0;
while (inwchars >= cnt + sizeof (wchar_t)
&& outchars < data->outbufsize)
{
if (*((wchar_t *) (inbuf + cnt)) >= L'\0'
&& *((wchar_t *) (inbuf + cnt)) <= L'\177')
outbuf[outchars] = *((wchar_t *) (inbuf + cnt));
else
/* Here is where the transliteration would enter the
scene. */
break;
++do_write;
++outchars;
cnt += sizeof (wchar_t);
}
*inbufsize -= cnt;
data->outbufavail = outchars;
if (outchars < data->outbufsize)
{
/* If there is still room in the output buffer something
is wrong with the input. */
if (inwchars >= cnt + sizeof (wchar_t))
{
/* An error occurred. */
result = GCONV_ILLEGAL_INPUT;
break;
}
if (inwchars != cnt)
{
/* There are some unprocessed bytes at the end of the
input buffer. */
result = GCONV_INCOMPLETE_INPUT;
break;
}
}
}
if (result != GCONV_OK)
break;
if (data->is_last)
{
/* This is the last step. */
result = (*inbufsize > (dir == from_iso646
? 0 : sizeof (wchar_t) - 1)
? GCONV_FULL_OUTPUT : GCONV_EMPTY_INPUT);
break;
}
/* Status so far. */
result = GCONV_EMPTY_INPUT;
if (data->outbufavail > 0)
{
/* Call the functions below in the chain. */
size_t newavail = data->outbufavail;
result = (*fct) (next_step, next_data, data->outbuf, &newavail,
written, 0);
/* Correct the output buffer. */
if (newavail != data->outbufavail && newavail > 0)
{
memmove (data->outbuf,
&data->outbuf[data->outbufavail - newavail],
newavail);
data->outbufavail = newavail;
}
}
}
while (*inbufsize > 0 && result == GCONV_EMPTY_INPUT);
}
if (written != NULL && data->is_last)
*written = do_write;
return result;
}