glibc/iconvdata/iso-2022-jp.c

791 lines
25 KiB
C
Raw Normal View History

/* Conversion module for ISO-2022-JP.
Update. 1999-01-11 Ulrich Drepper <drepper@cygnus.com> * ctype/Versions [GLIBC_2.0]: Export __ctype32_b. * include/wctype.h: Declare __iswctype. * stdio-common/vfscanf.c (__vfscanf): Use __iswspace instead of iswspace. * wctype/Makefile (routines): Add wcextra_l. * wctype/wcextra.c (iswblank): Implement function here and don't use __iswctype. (__iswblank_l): Move definition to... * wctype/wcextra_l.c: ...here. New file. * wctype/wcfuncs.c: Really implement functions and don't call __iswctype or __towctrans. * wctype/wctype.h: Change isw* and tow* macros. Don't call __iswctype or __towctrans. Instead optimize constant argument case. * iconv/gconv.h: Fix typos. * iconv/skeleton.c: Fix typos. Optimize init function a bit. Correctly emit escape sequence to return to initial state in conversion function. * iconvdata/iso-2022-jp.c (gconv_init): Correctly initialize max_needed_to element. * manual/mbyte.texi: Removed. This is now described in charset.texi. * manual/charset.texi: New file. * manual/Makefile (chapters): Replace mbyte by charset. * manual/ctype.texi: Document wide character functions. * manual/intro.texi: Fix reference to mbyte chapter. * manual/lang.texi: Likewise. * manual/locale.texi: Likewise. * manual/stdio.texi: Likewise. * manual/string.texi: Fix @node line for new charset chapter. * manual/libc.texinfo (UPDATED): Updated. Also update copyright years. * manual/memory.texi (savestring): Optimize code to give a good example. * manual/filesys.texi: Fix wording. Patches by Jim Meyering. * nscd/nscd_getgr_r.c: Include stdint.h to get uintptr_t definition. * nscd/nscd_getpw_r.c: Likewise. * nscd/nscd_gethst_r.c: Likewise. * stdlib/stdtold_l.c: Always include xlocale.h. 1999-01-11 Geoffrey Keating <geoffk@ozemail.com.au> * stdlib/fpioconst.h (LDBL_MAX_10_EXP_LOG): Define to be same as DBL_MAX_10_EXP_LOG if there is no long double. (_fpioconst_pow10): Always use size as LDBL_MAX_10_EXP_LOG to match printf_fp.c. 1999-01-10 Andreas Jaeger <aj@arthur.rhein-neckar.de> * timezone/Makefile ($(testdata)/GB): Changed to ... ($(testdata)/Europe/London): ... for tst-timezone test. ($(objpfx)tst-timezone.out): Change GB to Europe/London. * timezone/tst-timezone.c (main): Enable DST switching test, change GB to Europe/London. 1999-01-10 Philip Blundell <philb@gnu.org> * socket/Makefile (headers): Remove bits/sockunion.h. 1999-01-09 Philip Blundell <philb@gnu.org> * socket/sys/socket.h: Don't include <bits/sockunion.h>. * sysdeps/generic/bits/sockunion.h: Deleted. * sysdeps/unix/sysv/linux/bits/sockunion.h: Likewise. 1999-01-08 H.J. Lu <hjl@gnu.org> * io/fts.c (fts_close): Don't access memory after having it freed.
1999-01-11 20:13:43 +00:00
Copyright (C) 1998, 1999 Free Software Foundation, Inc.
This file is part of the GNU C Library.
Contributed by Ulrich Drepper <drepper@cygnus.com>, 1998.
The GNU C Library is free software; you can redistribute it and/or
modify it under the terms of the GNU Library General Public License as
published by the Free Software Foundation; either version 2 of the
License, or (at your option) any later version.
The GNU C Library is distributed in the hope that it will be useful,
but WITHOUT ANY WARRANTY; without even the implied warranty of
MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the GNU
Library General Public License for more details.
You should have received a copy of the GNU Library General Public
License along with the GNU C Library; see the file COPYING.LIB. If not,
write to the Free Software Foundation, Inc., 59 Temple Place - Suite 330,
Boston, MA 02111-1307, USA. */
#include <gconv.h>
#include <stdint.h>
Update. 1998-09-14 11:26 Ulrich Drepper <drepper@cygnus.com> * wcsmbs/wcsmbs-tst1.c: Include stdlib.h. 1998-09-14 Thorsten Kukuk <kukuk@vt.uni-paderborn.de> * libc-work/nis/nss_nisplus/nisplus-service.c (_nss_nisplus_parse_servent): Convert port in network byte order. 1998-09-14 07:53 -0400 Zack Weinberg <zack@rabi.phys.columbia.edu> * stdlib/stdlib.h: Add a switch, __need_malloc_and_calloc, to provide only malloc and calloc. * include/stdlib.h: Support the above. * string/bits/string2.h: Use __need_malloc_and_calloc when including stdlib.h. (__string2_1bptr_p): Avoid -Wbad-function-cast warnings. * iconvdata/iso-2022-jp.c: Include <stdlib.h>. * iconvdata/iso646.c: Include <stdlib.h>. 1998-09-14 07:51 -0400 Zack Weinberg <zack@rabi.phys.columbia.edu> * sunrpc/rpc_cout.c: Add braces around ambiguous else. * sysdeps/libm-ieee754/w_pow.c: Likewise. * sysdeps/libm-ieee754/w_powf.c: Likewise. * sysdeps/libm-ieee754/w_powl.c: Likewise. 1998-09-14 07:57 -0400 Zack Weinberg <zack@rabi.phys.columbia.edu> * stdio-common/stdio_lim.h.in: New file. All parameters are adjustable at build time. * Rules: Add a rule to build bits/stdio_lim.h from stdio-common/stdio_lim.h.in. (It has to be in Rules so that all subdirectories know how to make the file. It can't be in Makerules because then it gets built at top level and the dependencies are wrong.) * stdio-common/Makefile (distribute): Add stdio_lim.h.in. * sysdeps/unix/sysv/linux/Makefile: Delete rules to make stdio_lim.h. * sysdeps/unix/sysv/linux/stdio_lim.h.in: Removed. * sysdeps/unix/sysv/linux/Dist: Take out stdio_lim.h.in. * sysdeps/posix/Makefile: Removed. * sysdeps/posix/mk-stdiolim.c: Removed. * sysdeps/generic/bits/stdio_lim.h: Removed. 1998-09-12 Andreas Schwab <schwab@issan.informatik.uni-dortmund.de> * argp/argp-help.c: Fixup indentation. * nss/nss_files/files-alias.c: Quiet -Wparentheses warning. * resolv/nss_dns/dns-network.c: Likewise. * resolv/res_send.c: Likewise. * rt/aio_cancel.c: Likewise. * rt/aio_misc.c: Likewise. 1998-09-12 Andreas Schwab <schwab@issan.informatik.uni-dortmund.de> * Makefile (install-symbolic-link): Remove the link list file at last. * Makerules (install-clean-symbolic-link-list): Removed. 1998-09-12 Andreas Schwab <schwab@issan.informatik.uni-dortmund.de> * timezone/tst-timezone.c: Print time in UTC to get consistent output. 1998-09-12 Andreas Schwab <schwab@issan.informatik.uni-dortmund.de> * elf/rtld.c (_dl_rpath): Removed. * elf/dl-support.c (_dl_path): Removed. * elf/Makefile ($(objpfx)ld.so): Don't pass -rpath. (CFLAGS-dl-support.c): Removed. * elf/dl-load.c (env_path_list): Renamed from fake_path_list. All uses changed. (_dl_init_paths): Always set env_path_list from LD_LIBRARY_PATH, instead of appending it to the main map's rpath info. (_dl_map_object): Consistently use LD_LIBRARY_PATH after all DT_RPATHs. This makes it effective again. (decompose_rpath): Remove second parameter, callers changed. (fillin_rpath): Allocate enough space in curwd. (expand_dynamic_string_token): Cope with get_origin returning -1. 1998-09-14 Thorsten Kukuk <kukuk@vt.uni-paderborn.de> * libc-work/nis/nss_compat/compat-pwd.c: Add support for passwd.adjunct. * nis/nss_nis/nis-pwd.c (_nss_nis_getpwent_r): Correct test for invalid password. 1998-09-13 18:06 Ulrich Drepper <drepper@cygnus.com> * locale/programs/locale.c: Update dates.
1998-09-14 11:43:26 +00:00
#include <stdlib.h>
#include <string.h>
#include "jis0201.h"
#include "jis0208.h"
#include "jis0212.h"
#include "gb2312.h"
#include "ksc5601.h"
struct gap
{
uint16_t start;
uint16_t end;
int32_t idx;
};
#include "iso8859-7jp.h"
/* This makes obvious what everybody knows: 0x1b is the Esc character. */
#define ESC 0x1b
/* We provide our own initialization and destructor function. */
#define DEFINE_INIT 0
#define DEFINE_FINI 0
/* Definitions used in the body of the `gconv' function. */
#define FROM_LOOP from_iso2022jp_loop
#define TO_LOOP to_iso2022jp_loop
#define MIN_NEEDED_FROM 1
#define MAX_NEEDED_FROM 4
#define MIN_NEEDED_TO 4
#define MAX_NEEDED_TO 4
#define FROM_DIRECTION (dir == from_iso2022jp)
#define PREPARE_LOOP \
enum direction dir = ((struct iso2022jp_data *) step->data)->dir; \
enum variant var = ((struct iso2022jp_data *) step->data)->var; \
int save_set; \
int *setp = &data->statep->count;
#define EXTRA_LOOP_ARGS , var, setp
/* Direction of the transformation. */
enum direction
{
illegal_dir,
to_iso2022jp,
from_iso2022jp
};
/* We handle ISO-2022-jp and ISO-2022-JP-2 here. */
enum variant
{
illegal_var,
iso2022jp,
iso2022jp2
};
struct iso2022jp_data
{
enum direction dir;
enum variant var;
};
/* The COUNT element of the state keeps track of the currently selected
character set. The possible values are: */
enum
{
ASCII_set = 0,
JISX0208_1978_set,
JISX0208_1983_set,
JISX0201_Roman_set,
JISX0201_Kana_set,
GB2312_set,
KSC5601_set,
JISX0212_set
};
/* The second value stored is the designation of the G2 set. The following
values are possible: */
enum
{
UNSPECIFIED_set = 0,
ISO88591_set,
ISO88597_set
};
int
gconv_init (struct gconv_step *step)
{
/* Determine which direction. */
struct iso2022jp_data *new_data;
enum direction dir = illegal_dir;
enum variant var = illegal_var;
int result;
if (__strcasecmp (step->from_name, "ISO-2022-JP//") == 0)
{
dir = from_iso2022jp;
var = iso2022jp;
}
else if (__strcasecmp (step->to_name, "ISO-2022-JP//") == 0)
{
dir = to_iso2022jp;
var = iso2022jp;
}
else if (__strcasecmp (step->from_name, "ISO-2022-JP-2//") == 0)
{
dir = from_iso2022jp;
var = iso2022jp2;
}
else if (__strcasecmp (step->to_name, "ISO-2022-JP-2//") == 0)
{
dir = to_iso2022jp;
var = iso2022jp2;
}
result = GCONV_NOCONV;
if (dir != illegal_dir)
{
new_data
= (struct iso2022jp_data *) malloc (sizeof (struct iso2022jp_data));
result = GCONV_NOMEM;
if (new_data != NULL)
{
new_data->dir = dir;
new_data->var = var;
step->data = new_data;
if (dir == from_iso2022jp)
{
step->min_needed_from = MIN_NEEDED_FROM;
step->max_needed_from = MAX_NEEDED_FROM;
step->min_needed_to = MIN_NEEDED_TO;
Update. 1999-01-11 Ulrich Drepper <drepper@cygnus.com> * ctype/Versions [GLIBC_2.0]: Export __ctype32_b. * include/wctype.h: Declare __iswctype. * stdio-common/vfscanf.c (__vfscanf): Use __iswspace instead of iswspace. * wctype/Makefile (routines): Add wcextra_l. * wctype/wcextra.c (iswblank): Implement function here and don't use __iswctype. (__iswblank_l): Move definition to... * wctype/wcextra_l.c: ...here. New file. * wctype/wcfuncs.c: Really implement functions and don't call __iswctype or __towctrans. * wctype/wctype.h: Change isw* and tow* macros. Don't call __iswctype or __towctrans. Instead optimize constant argument case. * iconv/gconv.h: Fix typos. * iconv/skeleton.c: Fix typos. Optimize init function a bit. Correctly emit escape sequence to return to initial state in conversion function. * iconvdata/iso-2022-jp.c (gconv_init): Correctly initialize max_needed_to element. * manual/mbyte.texi: Removed. This is now described in charset.texi. * manual/charset.texi: New file. * manual/Makefile (chapters): Replace mbyte by charset. * manual/ctype.texi: Document wide character functions. * manual/intro.texi: Fix reference to mbyte chapter. * manual/lang.texi: Likewise. * manual/locale.texi: Likewise. * manual/stdio.texi: Likewise. * manual/string.texi: Fix @node line for new charset chapter. * manual/libc.texinfo (UPDATED): Updated. Also update copyright years. * manual/memory.texi (savestring): Optimize code to give a good example. * manual/filesys.texi: Fix wording. Patches by Jim Meyering. * nscd/nscd_getgr_r.c: Include stdint.h to get uintptr_t definition. * nscd/nscd_getpw_r.c: Likewise. * nscd/nscd_gethst_r.c: Likewise. * stdlib/stdtold_l.c: Always include xlocale.h. 1999-01-11 Geoffrey Keating <geoffk@ozemail.com.au> * stdlib/fpioconst.h (LDBL_MAX_10_EXP_LOG): Define to be same as DBL_MAX_10_EXP_LOG if there is no long double. (_fpioconst_pow10): Always use size as LDBL_MAX_10_EXP_LOG to match printf_fp.c. 1999-01-10 Andreas Jaeger <aj@arthur.rhein-neckar.de> * timezone/Makefile ($(testdata)/GB): Changed to ... ($(testdata)/Europe/London): ... for tst-timezone test. ($(objpfx)tst-timezone.out): Change GB to Europe/London. * timezone/tst-timezone.c (main): Enable DST switching test, change GB to Europe/London. 1999-01-10 Philip Blundell <philb@gnu.org> * socket/Makefile (headers): Remove bits/sockunion.h. 1999-01-09 Philip Blundell <philb@gnu.org> * socket/sys/socket.h: Don't include <bits/sockunion.h>. * sysdeps/generic/bits/sockunion.h: Deleted. * sysdeps/unix/sysv/linux/bits/sockunion.h: Likewise. 1999-01-08 H.J. Lu <hjl@gnu.org> * io/fts.c (fts_close): Don't access memory after having it freed.
1999-01-11 20:13:43 +00:00
step->max_needed_to = MAX_NEEDED_TO;
}
else
{
step->min_needed_from = MIN_NEEDED_TO;
step->max_needed_from = MAX_NEEDED_TO;
step->min_needed_to = MIN_NEEDED_FROM;
Update. 1999-01-11 Ulrich Drepper <drepper@cygnus.com> * ctype/Versions [GLIBC_2.0]: Export __ctype32_b. * include/wctype.h: Declare __iswctype. * stdio-common/vfscanf.c (__vfscanf): Use __iswspace instead of iswspace. * wctype/Makefile (routines): Add wcextra_l. * wctype/wcextra.c (iswblank): Implement function here and don't use __iswctype. (__iswblank_l): Move definition to... * wctype/wcextra_l.c: ...here. New file. * wctype/wcfuncs.c: Really implement functions and don't call __iswctype or __towctrans. * wctype/wctype.h: Change isw* and tow* macros. Don't call __iswctype or __towctrans. Instead optimize constant argument case. * iconv/gconv.h: Fix typos. * iconv/skeleton.c: Fix typos. Optimize init function a bit. Correctly emit escape sequence to return to initial state in conversion function. * iconvdata/iso-2022-jp.c (gconv_init): Correctly initialize max_needed_to element. * manual/mbyte.texi: Removed. This is now described in charset.texi. * manual/charset.texi: New file. * manual/Makefile (chapters): Replace mbyte by charset. * manual/ctype.texi: Document wide character functions. * manual/intro.texi: Fix reference to mbyte chapter. * manual/lang.texi: Likewise. * manual/locale.texi: Likewise. * manual/stdio.texi: Likewise. * manual/string.texi: Fix @node line for new charset chapter. * manual/libc.texinfo (UPDATED): Updated. Also update copyright years. * manual/memory.texi (savestring): Optimize code to give a good example. * manual/filesys.texi: Fix wording. Patches by Jim Meyering. * nscd/nscd_getgr_r.c: Include stdint.h to get uintptr_t definition. * nscd/nscd_getpw_r.c: Likewise. * nscd/nscd_gethst_r.c: Likewise. * stdlib/stdtold_l.c: Always include xlocale.h. 1999-01-11 Geoffrey Keating <geoffk@ozemail.com.au> * stdlib/fpioconst.h (LDBL_MAX_10_EXP_LOG): Define to be same as DBL_MAX_10_EXP_LOG if there is no long double. (_fpioconst_pow10): Always use size as LDBL_MAX_10_EXP_LOG to match printf_fp.c. 1999-01-10 Andreas Jaeger <aj@arthur.rhein-neckar.de> * timezone/Makefile ($(testdata)/GB): Changed to ... ($(testdata)/Europe/London): ... for tst-timezone test. ($(objpfx)tst-timezone.out): Change GB to Europe/London. * timezone/tst-timezone.c (main): Enable DST switching test, change GB to Europe/London. 1999-01-10 Philip Blundell <philb@gnu.org> * socket/Makefile (headers): Remove bits/sockunion.h. 1999-01-09 Philip Blundell <philb@gnu.org> * socket/sys/socket.h: Don't include <bits/sockunion.h>. * sysdeps/generic/bits/sockunion.h: Deleted. * sysdeps/unix/sysv/linux/bits/sockunion.h: Likewise. 1999-01-08 H.J. Lu <hjl@gnu.org> * io/fts.c (fts_close): Don't access memory after having it freed.
1999-01-11 20:13:43 +00:00
step->max_needed_to = MAX_NEEDED_FROM + 2;
}
/* Yes, this is a stateful encoding. */
step->stateful = 1;
result = GCONV_OK;
}
}
return result;
}
void
gconv_end (struct gconv_step *data)
{
free (data->data);
}
/* Since this is a stateful encoding we have to provide code which resets
the output state to the initial state. This has to be done during the
flushing. */
#define EMIT_SHIFT_TO_INIT \
if (data->statep->count != ASCII_set) \
{ \
enum direction dir = ((struct iso2022jp_data *) step->data)->dir; \
\
if (dir == from_iso2022jp) \
/* It's easy, we don't have to emit anything, we just reset the \
state for the input. Note that this also clears the G2 \
designation. */ \
data->statep->count = ASCII_set; \
else \
{ \
char *outbuf = data->outbuf; \
\
/* We are not in the initial state. To switch back we have \
to emit the sequence `Esc ( B'. */ \
if (outbuf + 3 > data->outbufend) \
/* We don't have enough room in the output buffer. */ \
status = GCONV_FULL_OUTPUT; \
else \
{ \
/* Write out the shift sequence. */ \
*outbuf++ = ESC; \
*outbuf++ = '('; \
*outbuf++ = 'B'; \
data->outbuf = outbuf; \
/* Note that this also clears the G2 designation. */ \
data->statep->count = ASCII_set; \
} \
} \
}
/* Since we might have to reset input pointer we must be able to save
and retore the state. */
#define SAVE_RESET_STATE(Save) \
if (Save) \
save_set = *setp; \
else \
*setp = save_set
/* First define the conversion function from ISO-2022-JP to UCS4. */
#define MIN_NEEDED_INPUT MIN_NEEDED_FROM
#define MAX_NEEDED_INPUT MAX_NEEDED_FROM
#define MIN_NEEDED_OUTPUT MIN_NEEDED_TO
#define LOOPFCT FROM_LOOP
#define BODY \
{ \
uint32_t ch = *inptr; \
\
/* Recognize escape sequences. */ \
if (ch == ESC) \
{ \
/* We now must be prepared to read two to three more \
chracters. If we have a match in the first character but \
then the input buffer ends we terminate with an error since \
we must not risk missing an escape sequence just because it \
is not entirely in the current input buffer. */ \
if (inptr + 2 >= inend \
|| (var == iso2022jp2 && inptr[1] == '$' && inptr[2] == '(' \
&& inptr + 3 >= inend)) \
{ \
/* Not enough input available. */ \
result = GCONV_EMPTY_INPUT; \
break; \
} \
\
if (inptr[1] == '(') \
{ \
if (inptr[2] == 'B') \
{ \
/* ASCII selected. */ \
set = ASCII_set; \
inptr += 3; \
continue; \
} \
else if (inptr[2] == 'J') \
{ \
/* JIS X 0201 selected. */ \
set = JISX0201_Roman_set; \
inptr += 3; \
continue; \
} \
else if (var == iso2022jp2 && inptr[2] == 'I') \
{ \
/* JIS X 0201 selected. */ \
set = JISX0201_Kana_set; \
inptr += 3; \
continue; \
} \
} \
else if (inptr[1] == '$') \
{ \
if (inptr[2] == '@') \
{ \
/* JIS X 0208-1978 selected. */ \
set = JISX0208_1978_set; \
inptr += 3; \
continue; \
} \
else if (inptr[2] == 'B') \
{ \
/* JIS X 0208-1983 selected. */ \
set = JISX0208_1983_set; \
inptr += 3; \
continue; \
} \
else if (var == iso2022jp2) \
{ \
if (inptr[2] == 'A') \
{ \
/* GB 2312-1980 selected. */ \
set = GB2312_set; \
inptr += 3; \
continue; \
} \
else if (inptr[2] == '(') \
{ \
if (inptr[3] == 'C') \
{ \
/* KSC 5601-1987 selected. */ \
set = KSC5601_set; \
inptr += 4; \
continue; \
} \
else if (inptr[3] == 'D') \
{ \
/* JIS X 0212-1990 selected. */ \
set = JISX0212_set; \
inptr += 4; \
continue; \
} \
} \
} \
} \
else if (var == iso2022jp2 && inptr[1] == '.') \
{ \
if (inptr[2] == 'A') \
{ \
/* ISO 8859-1-GR selected. */ \
set2 = ISO88591_set; \
inptr += 3; \
continue; \
} \
else if (inptr[2] == 'F') \
{ \
/* ISO 8859-7-GR selected. */ \
set2 = ISO88597_set; \
inptr += 3; \
continue; \
} \
} \
} \
\
if (ch == ESC && var == iso2022jp2 && inptr[1] == 'N') \
{ \
if (set2 == ISO88591_set) \
{ \
ch = inptr[2] | 0x80; \
inptr += 3; \
} \
else if (set2 == ISO88597_set) \
{ \
/* We use the table from the ISO 8859-7 module. */ \
if (inptr[2] < 0x20 || inptr[2] > 0x80) \
{ \
result = GCONV_ILLEGAL_INPUT; \
break; \
} \
ch = iso88597_to_ucs4[inptr[2] - 0x20]; \
if (ch == 0) \
{ \
result = GCONV_ILLEGAL_INPUT; \
break; \
} \
inptr += 3; \
} \
else \
{ \
result = GCONV_ILLEGAL_INPUT; \
break; \
} \
} \
else if (set == ASCII_set || (ch < 0x21 || ch == 0x7f)) \
/* Almost done, just advance the input pointer. */ \
++inptr; \
else if (set == JISX0201_Roman_set) \
{ \
/* Use the JIS X 0201 table. */ \
ch = jisx0201_to_ucs4 (ch); \
if (ch == UNKNOWN_10646_CHAR) \
{ \
result = GCONV_ILLEGAL_INPUT; \
break; \
} \
++inptr; \
} \
else if (set == JISX0201_Kana_set) \
{ \
/* Use the JIS X 0201 table. */ \
ch = jisx0201_to_ucs4 (ch + 0x80); \
if (ch == UNKNOWN_10646_CHAR) \
{ \
result = GCONV_ILLEGAL_INPUT; \
break; \
} \
++inptr; \
} \
else \
{ \
if (set == JISX0208_1978_set || set == JISX0208_1983_set) \
/* XXX I don't have the tables for these two old variants of \
JIS X 0208. Therefore I'm using the tables for JIS X \
0208-1990. If somebody has problems with this please \
provide the appropriate tables. */ \
ch = jisx0208_to_ucs4 (&inptr, \
NEED_LENGTH_TEST ? inend - inptr : 2, 0); \
else if (set == JISX0212_set) \
/* Use the JIS X 0212 table. */ \
ch = jisx0212_to_ucs4 (&inptr, \
NEED_LENGTH_TEST ? inend - inptr : 2, 0); \
else if (set == GB2312_set) \
/* Use the GB 2312 table. */ \
ch = gb2312_to_ucs4 (&inptr, \
NEED_LENGTH_TEST ? inend - inptr : 2, 0); \
else \
{ \
assert (set == KSC5601_set); \
\
/* Use the KSC 5601 table. */ \
ch = ksc5601_to_ucs4 (&inptr, \
NEED_LENGTH_TEST ? inend - inptr : 2, 0); \
} \
\
if (NEED_LENGTH_TEST && ch == 0) \
{ \
result = GCONV_EMPTY_INPUT; \
break; \
} \
else if (ch == UNKNOWN_10646_CHAR) \
{ \
result = GCONV_ILLEGAL_INPUT; \
break; \
} \
} \
\
*((uint32_t *) outptr)++ = ch; \
}
#define EXTRA_LOOP_DECLS , enum variant var, int *setp
#define INIT_PARAMS int set = *setp % 0x100, set2 = *setp / 0x100
#define UPDATE_PARAMS *setp = (set2 << 8) + set
#include <iconv/loop.c>
/* Next, define the other direction. */
#define MIN_NEEDED_INPUT MIN_NEEDED_TO
#define MIN_NEEDED_OUTPUT MIN_NEEDED_FROM
#define MAX_NEEDED_OUTPUT (MAX_NEEDED_FROM + 2)
#define LOOPFCT TO_LOOP
#define BODY \
{ \
uint32_t ch; \
size_t written = 0; \
\
ch = *((uint32_t *) inptr); \
\
/* First see whether we can write the character using the currently \
selected character set. */ \
if (set == ASCII_set) \
{ \
/* Please note that the NUL byte is *not* matched if we are not \
currently using the ASCII charset. This is because we must \
switch to the initial state whenever a NUL byte is written. */ \
if (ch <= 0x7f) \
{ \
*outptr++ = ch; \
written = 1; \
} \
/* At the beginning of a line, G2 designation is cleared. */ \
if (var == iso2022jp2 && ch == 0x0a) \
set2 = UNSPECIFIED_set; \
} \
else if (set == JISX0201_Roman_set) \
{ \
unsigned char buf[2]; \
written = ucs4_to_jisx0201 (ch, buf); \
if (written != UNKNOWN_10646_CHAR && buf[0] > 0x20 && buf[0] < 0x80) \
{ \
*outptr++ = buf[0]; \
written = 1; \
} \
else \
written = UNKNOWN_10646_CHAR; \
} \
else if (set == JISX0201_Kana_set) \
{ \
unsigned char buf[2]; \
written = ucs4_to_jisx0201 (ch, buf); \
if (written != UNKNOWN_10646_CHAR && buf[0] > 0xa0 && buf[0] < 0xe0) \
{ \
*outptr++ = buf[0] - 0x80; \
written = 1; \
} \
else \
written = UNKNOWN_10646_CHAR; \
} \
else \
{ \
if (set == JISX0208_1978_set || set == JISX0208_1983_set) \
written = ucs4_to_jisx0208 (ch, outptr, \
(NEED_LENGTH_TEST \
? outend - outptr : 2)); \
else if (set == JISX0212_set) \
written = ucs4_to_jisx0212 (ch, outptr, \
(NEED_LENGTH_TEST \
? outend - outptr : 2)); \
else if (set == GB2312_set) \
written = ucs4_to_gb2312 (ch, outptr, (NEED_LENGTH_TEST \
? outend - outptr : 2)); \
else \
{ \
assert (set == KSC5601_set); \
\
written = ucs4_to_ksc5601 (ch, outptr, \
(NEED_LENGTH_TEST \
? outend - outptr : 2)); \
} \
\
if (NEED_LENGTH_TEST && written == 0) \
{ \
result = GCONV_FULL_OUTPUT; \
break; \
} \
else if (written != UNKNOWN_10646_CHAR) \
outptr += written; \
} \
\
if (written == UNKNOWN_10646_CHAR || written == 0) \
{ \
if (set2 == ISO88591_set) \
{ \
if (ch >= 0x80 && ch <= 0xff) \
{ \
*outptr++ = ESC; \
*outptr++ = 'N'; \
*outptr++ = ch & 0x7f; \
written = 3; \
} \
} \
else if (set2 == ISO88597_set) \
{ \
const struct gap *rp = from_idx; \
\
while (ch > rp->end) \
++rp; \
if (ch >= rp->start) \
{ \
unsigned char res = iso88597_from_ucs4[ch - 0xa0 + rp->idx]; \
if (res != '\0') \
{ \
*outptr++ = ESC; \
*outptr++ = 'N'; \
*outptr++ = res; \
written = 3; \
} \
} \
} \
} \
\
if (written == UNKNOWN_10646_CHAR || written == 0) \
{ \
/* Either this is an unknown character or we have to switch \
the currently selected character set. The character sets \
do not code entirely separate parts of ISO 10646 and \
therefore there is no single correct result. If we choose \
the character set to use wrong we might be end up with \
using yet another character set for the next character \
though the current and the next could be encoded with one \
character set. We leave this kind of optimization for \
later and now simply use a fixed order in which we test for \
availability */ \
\
if (ch <= 0x7f) \
{ \
/* We must encode using ASCII. First write out the \
escape sequence. */ \
if (NEED_LENGTH_TEST && outptr + 4 > outend) \
{ \
result = GCONV_FULL_OUTPUT; \
break; \
} \
\
*outptr++ = ESC; \
*outptr++ = '('; \
*outptr++ = 'B'; \
set = ASCII_set; \
*outptr++ = ch; \
\
/* At the beginning of a line, G2 designation is cleared. */ \
if (var == iso2022jp2 && ch == 0x0a) \
set2 = UNSPECIFIED_set; \
} \
else \
{ \
/* Now it becomes difficult. We must search the other \
character sets one by one and we cannot use simple \
arithmetic to determine whether the character can be \
encoded using this set. */ \
size_t written; \
unsigned char buf[2]; \
\
written = ucs4_to_jisx0201 (ch, buf); \
if (written != UNKNOWN_10646_CHAR && buf[0] < 0x80) \
{ \
/* We use JIS X 0201. */ \
if (NEED_LENGTH_TEST && outptr + 4 > outend) \
{ \
result = GCONV_FULL_OUTPUT; \
break; \
} \
\
*outptr++ = ESC; \
*outptr++ = '('; \
*outptr++ = 'J'; \
set = JISX0201_Roman_set; \
*outptr++ = buf[0]; \
} \
else \
{ \
written = ucs4_to_jisx0208 (ch, buf, 2); \
if (written != UNKNOWN_10646_CHAR) \
{ \
/* We use JIS X 0208. */ \
if (NEED_LENGTH_TEST && outptr + 5 > outend) \
{ \
result = GCONV_FULL_OUTPUT; \
break; \
} \
\
*outptr++ = ESC; \
*outptr++ = '$'; \
*outptr++ = 'B'; \
set = JISX0208_1983_set; \
*outptr++ = buf[0]; \
*outptr++ = buf[1]; \
} \
else if (var == iso2022jp) \
{ \
/* We have no other choice. */ \
result = GCONV_ILLEGAL_INPUT; \
break; \
} \
else \
{ \
written = ucs4_to_jisx0212 (ch, buf, 2); \
if (written != UNKNOWN_10646_CHAR) \
{ \
/* We use JIS X 0212. */ \
if (NEED_LENGTH_TEST && outptr + 6 > outend) \
{ \
result = GCONV_FULL_OUTPUT; \
break; \
} \
*outptr++ = ESC; \
*outptr++ = '$'; \
*outptr++ = '('; \
*outptr++ = 'D'; \
set = JISX0212_set; \
*outptr++ = buf[0]; \
*outptr++ = buf[1]; \
} \
else \
{ \
written = ucs4_to_jisx0201 (ch, buf); \
if (written != UNKNOWN_10646_CHAR && buf[0] >= 0x80) \
{ \
/* We use JIS X 0201. */ \
if (NEED_LENGTH_TEST && outptr + 4 > outend) \
{ \
result = GCONV_FULL_OUTPUT; \
break; \
} \
\
*outptr++ = ESC; \
*outptr++ = '('; \
*outptr++ = 'I'; \
set = JISX0201_Kana_set; \
*outptr++ = buf[0] - 0x80; \
} \
else if (ch != 0xa5 && ch >= 0x80 && ch <= 0xff) \
{ \
/* ISO 8859-1 upper half. */ \
if (NEED_LENGTH_TEST && outptr + 6 > outend) \
{ \
result = GCONV_FULL_OUTPUT; \
break; \
} \
\
*outptr++ = ESC; \
*outptr++ = '.'; \
*outptr++ = 'A'; \
set2 = ISO88591_set; \
*outptr++ = ESC; \
*outptr++ = 'N'; \
*outptr++ = ch; \
} \
else \
{ \
written = ucs4_to_gb2312 (ch, buf, 2); \
if (written != UNKNOWN_10646_CHAR) \
{ \
/* We use GB 2312. */ \
if (NEED_LENGTH_TEST && outptr + 5 > outend) \
{ \
result = GCONV_FULL_OUTPUT; \
break; \
} \
\
*outptr++ = ESC; \
*outptr++ = '$'; \
*outptr++ = 'A'; \
set = GB2312_set; \
*outptr++ = buf[0]; \
*outptr++ = buf[1]; \
} \
else \
{ \
written = ucs4_to_ksc5601 (ch, buf, 2); \
if (written != UNKNOWN_10646_CHAR) \
{ \
/* We use KSC 5601. */ \
if (NEED_LENGTH_TEST \
&& outptr + 6 > outend) \
{ \
result = GCONV_FULL_OUTPUT; \
break; \
} \
*outptr++ = ESC; \
*outptr++ = '$'; \
*outptr++ = '('; \
*outptr++ = 'C'; \
set = KSC5601_set; \
*outptr++ = buf[0]; \
*outptr++ = buf[1]; \
} \
else \
{ \
const struct gap *rp = from_idx; \
unsigned char gch = 0; \
\
while (ch > rp->end) \
++rp; \
if (ch >= rp->start) \
{ \
ch = ch - 0xa0 + rp->idx; \
gch = iso88597_from_ucs4[ch]; \
} \
\
if (gch != 0) \
{ \
/* We use ISO 8859-7 greek. */ \
if (NEED_LENGTH_TEST \
&& outptr + 6 > outend) \
{ \
result = GCONV_FULL_OUTPUT; \
break; \
} \
*outptr++ = ESC; \
*outptr++ = '.'; \
*outptr++ = 'F'; \
set2 = ISO88597_set; \
*outptr++ = ESC; \
*outptr++ = 'N'; \
*outptr++ = gch; \
} \
else \
{ \
result = GCONV_ILLEGAL_INPUT; \
break; \
} \
} \
} \
} \
} \
} \
} \
} \
} \
\
/* Now that we wrote the output increment the input pointer. */ \
inptr += 4; \
}
#define EXTRA_LOOP_DECLS , enum variant var, int *setp
#define INIT_PARAMS int set = *setp % 0x100, set2 = *setp / 0x100
#define UPDATE_PARAMS *setp = (set2 << 8) + set
#include <iconv/loop.c>
/* Now define the toplevel functions. */
#include <iconv/skeleton.c>