glibc/iconvdata/unicode.c

191 lines
5.1 KiB
C
Raw Normal View History

/* Conversion module for Unicode
Copyright (C) 1999, 2000 Free Software Foundation, Inc.
This file is part of the GNU C Library.
Contributed by Ulrich Drepper <drepper@cygnus.com>, 1999.
The GNU C Library is free software; you can redistribute it and/or
modify it under the terms of the GNU Library General Public License as
published by the Free Software Foundation; either version 2 of the
License, or (at your option) any later version.
The GNU C Library is distributed in the hope that it will be useful,
but WITHOUT ANY WARRANTY; without even the implied warranty of
MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the GNU
Library General Public License for more details.
You should have received a copy of the GNU Library General Public
License along with the GNU C Library; see the file COPYING.LIB. If not,
write to the Free Software Foundation, Inc., 59 Temple Place - Suite 330,
Boston, MA 02111-1307, USA. */
#include <byteswap.h>
#include <gconv.h>
#include <stddef.h>
#include <stdint.h>
#include <stdlib.h>
#include <string.h>
/* This is the Byte Order Mark character (BOM). */
#define BOM 0xfeff
/* And in the other endian format. */
#define BOM_OE 0xfffe
/* Definitions used in the body of the `gconv' function. */
#define FROM_LOOP from_unicode_loop
#define TO_LOOP to_unicode_loop
#define DEFINE_INIT 0
#define DEFINE_FINI 0
#define MIN_NEEDED_FROM 2
#define MIN_NEEDED_TO 4
#define FROM_DIRECTION (dir == from_unicode)
#define PREPARE_LOOP \
enum direction dir = ((struct unicode_data *) step->__data)->dir; \
int swap; \
if (FROM_DIRECTION) \
{ \
if (data->__invocation_counter == 0) \
{ \
/* We have to find out which byte order the file is encoded in. */ \
Update. 2000-04-09 Ulrich Drepper <drepper@redhat.com> Implement handling of restartable conversion functions according to ISO C. * iconv/gconv.h (__gconv_fct): Add additional parameter. * iconv/gconv_int.h (__BUILTIN_TRANS): Likewise. * iconv/gconv.c: Pass additional parameter to conversion function. * iconv/gconv_simple.c (internal_ucs4_loop_single): New function. (internal_ucs4le_loop_single): New function. (__gconv_transform_ascii_internal): Define ONE_DIRECTION. (__gconv_transform_internal_ascii): Likewise. (__gconv_transform_internal_utf8): Likewise. (__gconv_transform_utf8_internal): Likewise. (__gconv_transform_ucs2_internal): Likewise. (__gconv_transform_internal_ucs2): Likewise. (__gconv_transform_ucs2reverse_internal): Likewise. (__gconv_transform_internal_ucs2reverse): Likewise. (internal_ucs4le_loop_unaligned): Before return __GCONV_INCOMPLETE_INPUT check that the remaining bytes really form a valid character. Otherwise return __GCONV_ILLEGAL_INPUT. (__gconv_transform_utf8_internal): Define STORE_REST and UNPACK_BYTES. * iconv/loop.c: Fit in definition of function to convert one character for processing of left-over bytes from the state object. * iconv/skeleton.c (gconv): Rename inbuf to inptrp and inbufend to inend to match names in loop functions. (RESET_INPUT_BUFFER): Change apprpriately. (gconv): If needed, call function to process bytes from the state object. Similar at the end: store left over bytes if input is incomplete. Take extra argument and add new argument to all calls of the conversion function. * iconvdata/iso-2022-cn.c: Adjust numeric values used to store information in the state object to not conflict with length count. * iconvdata/iso-2022-jp.c: Likewise. * iconvdata/iso-2022-kr.c: Likewise. * iconvdata/unicode.c: Adjust for change change in parameters of skeleton function. * iconvdata/utf-16.c: Likewise. * libio/iofwide.c: Add new parameter to all calls of conversion function. * wcsmbs/btowc.c: Likewise. * wcsmbs/mbrtowc.c: Likewise. * wcsmbs/mbsnrtowcs.c: Likewise. * wcsmbs/mbsrtowcs.c: Likewise. * wcsmbs/wcrtomb.c: Likewise. * wcsmbs/wcsnrtombs.c: Likewise. * wcsmbs/wcsrtombs.c: Likewise. * wcsmbs/wctob.c: Likewise. * iconvdata/gbgbk.c: Always define MAX_NEEDED_OUTPUT and MAX_NEEDED_INPUT.
2000-04-09 17:43:29 +00:00
if (inptr + 2 > inend) \
return __GCONV_EMPTY_INPUT; \
\
Update. 2000-03-28 Ulrich Drepper <drepper@redhat.com> * iconvdata/TESTS: Use UCS-2BE instead of UCS2. * iconv/loop.c: Define get16, get32, put16, and put32 macros to allow as well reading from/writing to unaligned addresses on machines which don't support this in hardware. Use FCTNAME macro to define function name. Include the file a second time for platforms which need special unaligned handling. * iconv/skeleton.c: Define get16u, get32u, put16u, and put32u macros to access potentially unaligned addresses. These macros are intended to be used only outside the loops. (unaligned): New definition. In case the machine can handle unaligned access define as zero. Otherwise as a variable which is initialized as nonzero in case the buffer passed in at runtime is unaligned with respect to the character set encoding involved. Call aligned or unaligned looop functions according to unaligned variable. * iconvdata/8bit-gap.c: Use get16, get32, put16, and put32 instead of direct casting pointer to potentially handle unaligned memory accesses. * iconvdata/8bit-generic.c: Likewise. * iconvdata/ansi_x3.110.c: Likewise. * iconvdata/big5.c: Likewise. * iconvdata/euc-cn.c: Likewise. * iconvdata/euc-jp.c: Likewise. * iconvdata/euc-kr.c: Likewise. * iconvdata/euc-tw.c: Likewise. * iconvdata/gbk.c: Likewise. * iconvdata/iso-2022-cn.c: Likewise. * iconvdata/iso-2022-jp.c: Likewise. * iconvdata/iso-2022-kr.c: Likewise. * iconvdata/iso646.c: Likewise. * iconvdata/iso_6937-2.c: Likewise. * iconvdata/iso_6937.c: Likewise. * iconvdata/johab.c: Likewise. * iconvdata/sjis.c: Likewise. * iconvdata/t.61.c: Likewise. * iconvdata/uhc.c: Likewise. * iconvdata/unicode.c: Likewise. * iconvdata/utf-16.c: Likewise. * locale/programs/simple-hash.c: Little optimizations. Remove K&R prototypes. * malloc/Versions [libc] (GLIBC_2.2): Add mcheck_check_all. * malloc/mcheck.c (mcheck_check_all): Renamed from check_all and made public. * malloc/mcheck.h (mcheck_check_all): Declare. * stdio-common/Makefile (tests): Add tst-obprintf.
2000-03-28 17:33:37 +00:00
if (get16u (inptr) == BOM) \
/* Simply ignore the BOM character. */ \
inptr += 2; \
Update. 2000-03-28 Ulrich Drepper <drepper@redhat.com> * iconvdata/TESTS: Use UCS-2BE instead of UCS2. * iconv/loop.c: Define get16, get32, put16, and put32 macros to allow as well reading from/writing to unaligned addresses on machines which don't support this in hardware. Use FCTNAME macro to define function name. Include the file a second time for platforms which need special unaligned handling. * iconv/skeleton.c: Define get16u, get32u, put16u, and put32u macros to access potentially unaligned addresses. These macros are intended to be used only outside the loops. (unaligned): New definition. In case the machine can handle unaligned access define as zero. Otherwise as a variable which is initialized as nonzero in case the buffer passed in at runtime is unaligned with respect to the character set encoding involved. Call aligned or unaligned looop functions according to unaligned variable. * iconvdata/8bit-gap.c: Use get16, get32, put16, and put32 instead of direct casting pointer to potentially handle unaligned memory accesses. * iconvdata/8bit-generic.c: Likewise. * iconvdata/ansi_x3.110.c: Likewise. * iconvdata/big5.c: Likewise. * iconvdata/euc-cn.c: Likewise. * iconvdata/euc-jp.c: Likewise. * iconvdata/euc-kr.c: Likewise. * iconvdata/euc-tw.c: Likewise. * iconvdata/gbk.c: Likewise. * iconvdata/iso-2022-cn.c: Likewise. * iconvdata/iso-2022-jp.c: Likewise. * iconvdata/iso-2022-kr.c: Likewise. * iconvdata/iso646.c: Likewise. * iconvdata/iso_6937-2.c: Likewise. * iconvdata/iso_6937.c: Likewise. * iconvdata/johab.c: Likewise. * iconvdata/sjis.c: Likewise. * iconvdata/t.61.c: Likewise. * iconvdata/uhc.c: Likewise. * iconvdata/unicode.c: Likewise. * iconvdata/utf-16.c: Likewise. * locale/programs/simple-hash.c: Little optimizations. Remove K&R prototypes. * malloc/Versions [libc] (GLIBC_2.2): Add mcheck_check_all. * malloc/mcheck.c (mcheck_check_all): Renamed from check_all and made public. * malloc/mcheck.h (mcheck_check_all): Declare. * stdio-common/Makefile (tests): Add tst-obprintf.
2000-03-28 17:33:37 +00:00
else if (get16u (inptr) == BOM_OE) \
{ \
((struct unicode_data *) step->__data)->swap = 1; \
inptr += 2; \
} \
} \
} \
else if (!data->__internal_use && data->__invocation_counter == 0) \
{ \
/* Emit the Byte Order Mark. */ \
if (outbuf + 2 > outend) \
return __GCONV_FULL_OUTPUT; \
\
Update. 2000-03-28 Ulrich Drepper <drepper@redhat.com> * iconvdata/TESTS: Use UCS-2BE instead of UCS2. * iconv/loop.c: Define get16, get32, put16, and put32 macros to allow as well reading from/writing to unaligned addresses on machines which don't support this in hardware. Use FCTNAME macro to define function name. Include the file a second time for platforms which need special unaligned handling. * iconv/skeleton.c: Define get16u, get32u, put16u, and put32u macros to access potentially unaligned addresses. These macros are intended to be used only outside the loops. (unaligned): New definition. In case the machine can handle unaligned access define as zero. Otherwise as a variable which is initialized as nonzero in case the buffer passed in at runtime is unaligned with respect to the character set encoding involved. Call aligned or unaligned looop functions according to unaligned variable. * iconvdata/8bit-gap.c: Use get16, get32, put16, and put32 instead of direct casting pointer to potentially handle unaligned memory accesses. * iconvdata/8bit-generic.c: Likewise. * iconvdata/ansi_x3.110.c: Likewise. * iconvdata/big5.c: Likewise. * iconvdata/euc-cn.c: Likewise. * iconvdata/euc-jp.c: Likewise. * iconvdata/euc-kr.c: Likewise. * iconvdata/euc-tw.c: Likewise. * iconvdata/gbk.c: Likewise. * iconvdata/iso-2022-cn.c: Likewise. * iconvdata/iso-2022-jp.c: Likewise. * iconvdata/iso-2022-kr.c: Likewise. * iconvdata/iso646.c: Likewise. * iconvdata/iso_6937-2.c: Likewise. * iconvdata/iso_6937.c: Likewise. * iconvdata/johab.c: Likewise. * iconvdata/sjis.c: Likewise. * iconvdata/t.61.c: Likewise. * iconvdata/uhc.c: Likewise. * iconvdata/unicode.c: Likewise. * iconvdata/utf-16.c: Likewise. * locale/programs/simple-hash.c: Little optimizations. Remove K&R prototypes. * malloc/Versions [libc] (GLIBC_2.2): Add mcheck_check_all. * malloc/mcheck.c (mcheck_check_all): Renamed from check_all and made public. * malloc/mcheck.h (mcheck_check_all): Declare. * stdio-common/Makefile (tests): Add tst-obprintf.
2000-03-28 17:33:37 +00:00
put16u (outbuf, BOM); \
outbuf += 2; \
} \
swap = ((struct unicode_data *) step->__data)->swap;
#define EXTRA_LOOP_ARGS , data, swap
/* Direction of the transformation. */
enum direction
{
illegal_dir,
to_unicode,
from_unicode
};
struct unicode_data
{
enum direction dir;
int swap;
};
int
gconv_init (struct __gconv_step *step)
{
/* Determine which direction. */
struct unicode_data *new_data;
enum direction dir = illegal_dir;
int result;
if (__strcasecmp (step->__from_name, "UNICODE") == 0)
dir = from_unicode;
else
dir = to_unicode;
new_data = (struct unicode_data *) malloc (sizeof (struct unicode_data));
result = __GCONV_NOMEM;
if (new_data != NULL)
{
new_data->dir = dir;
new_data->swap = 0;
step->__data = new_data;
if (dir == from_unicode)
{
step->__min_needed_from = MIN_NEEDED_FROM;
step->__max_needed_from = MIN_NEEDED_FROM;
step->__min_needed_to = MIN_NEEDED_TO;
step->__max_needed_to = MIN_NEEDED_TO;
}
else
{
step->__min_needed_from = MIN_NEEDED_TO;
step->__max_needed_from = MIN_NEEDED_TO;
step->__min_needed_to = MIN_NEEDED_FROM;
step->__max_needed_to = MIN_NEEDED_FROM;
}
step->__stateful = 0;
result = __GCONV_OK;
}
return result;
}
void
gconv_end (struct __gconv_step *data)
{
free (data->__data);
}
/* Convert from the internal (UCS4-like) format to UCS2. */
#define MIN_NEEDED_INPUT MIN_NEEDED_TO
#define MIN_NEEDED_OUTPUT MIN_NEEDED_FROM
#define LOOPFCT TO_LOOP
#define BODY \
{ \
Update. 2000-03-28 Ulrich Drepper <drepper@redhat.com> * iconvdata/TESTS: Use UCS-2BE instead of UCS2. * iconv/loop.c: Define get16, get32, put16, and put32 macros to allow as well reading from/writing to unaligned addresses on machines which don't support this in hardware. Use FCTNAME macro to define function name. Include the file a second time for platforms which need special unaligned handling. * iconv/skeleton.c: Define get16u, get32u, put16u, and put32u macros to access potentially unaligned addresses. These macros are intended to be used only outside the loops. (unaligned): New definition. In case the machine can handle unaligned access define as zero. Otherwise as a variable which is initialized as nonzero in case the buffer passed in at runtime is unaligned with respect to the character set encoding involved. Call aligned or unaligned looop functions according to unaligned variable. * iconvdata/8bit-gap.c: Use get16, get32, put16, and put32 instead of direct casting pointer to potentially handle unaligned memory accesses. * iconvdata/8bit-generic.c: Likewise. * iconvdata/ansi_x3.110.c: Likewise. * iconvdata/big5.c: Likewise. * iconvdata/euc-cn.c: Likewise. * iconvdata/euc-jp.c: Likewise. * iconvdata/euc-kr.c: Likewise. * iconvdata/euc-tw.c: Likewise. * iconvdata/gbk.c: Likewise. * iconvdata/iso-2022-cn.c: Likewise. * iconvdata/iso-2022-jp.c: Likewise. * iconvdata/iso-2022-kr.c: Likewise. * iconvdata/iso646.c: Likewise. * iconvdata/iso_6937-2.c: Likewise. * iconvdata/iso_6937.c: Likewise. * iconvdata/johab.c: Likewise. * iconvdata/sjis.c: Likewise. * iconvdata/t.61.c: Likewise. * iconvdata/uhc.c: Likewise. * iconvdata/unicode.c: Likewise. * iconvdata/utf-16.c: Likewise. * locale/programs/simple-hash.c: Little optimizations. Remove K&R prototypes. * malloc/Versions [libc] (GLIBC_2.2): Add mcheck_check_all. * malloc/mcheck.c (mcheck_check_all): Renamed from check_all and made public. * malloc/mcheck.h (mcheck_check_all): Declare. * stdio-common/Makefile (tests): Add tst-obprintf.
2000-03-28 17:33:37 +00:00
uint32_t c = get32 (inptr); \
\
if (c >= 0x10000) \
{ \
result = __GCONV_ILLEGAL_INPUT; \
break; \
} \
\
Update. 2000-03-28 Ulrich Drepper <drepper@redhat.com> * iconvdata/TESTS: Use UCS-2BE instead of UCS2. * iconv/loop.c: Define get16, get32, put16, and put32 macros to allow as well reading from/writing to unaligned addresses on machines which don't support this in hardware. Use FCTNAME macro to define function name. Include the file a second time for platforms which need special unaligned handling. * iconv/skeleton.c: Define get16u, get32u, put16u, and put32u macros to access potentially unaligned addresses. These macros are intended to be used only outside the loops. (unaligned): New definition. In case the machine can handle unaligned access define as zero. Otherwise as a variable which is initialized as nonzero in case the buffer passed in at runtime is unaligned with respect to the character set encoding involved. Call aligned or unaligned looop functions according to unaligned variable. * iconvdata/8bit-gap.c: Use get16, get32, put16, and put32 instead of direct casting pointer to potentially handle unaligned memory accesses. * iconvdata/8bit-generic.c: Likewise. * iconvdata/ansi_x3.110.c: Likewise. * iconvdata/big5.c: Likewise. * iconvdata/euc-cn.c: Likewise. * iconvdata/euc-jp.c: Likewise. * iconvdata/euc-kr.c: Likewise. * iconvdata/euc-tw.c: Likewise. * iconvdata/gbk.c: Likewise. * iconvdata/iso-2022-cn.c: Likewise. * iconvdata/iso-2022-jp.c: Likewise. * iconvdata/iso-2022-kr.c: Likewise. * iconvdata/iso646.c: Likewise. * iconvdata/iso_6937-2.c: Likewise. * iconvdata/iso_6937.c: Likewise. * iconvdata/johab.c: Likewise. * iconvdata/sjis.c: Likewise. * iconvdata/t.61.c: Likewise. * iconvdata/uhc.c: Likewise. * iconvdata/unicode.c: Likewise. * iconvdata/utf-16.c: Likewise. * locale/programs/simple-hash.c: Little optimizations. Remove K&R prototypes. * malloc/Versions [libc] (GLIBC_2.2): Add mcheck_check_all. * malloc/mcheck.c (mcheck_check_all): Renamed from check_all and made public. * malloc/mcheck.h (mcheck_check_all): Declare. * stdio-common/Makefile (tests): Add tst-obprintf.
2000-03-28 17:33:37 +00:00
put16 (outptr, c); \
\
outptr += 2; \
inptr += 4; \
}
#define EXTRA_LOOP_DECLS \
, struct __gconv_step_data *step_data, int swap
#include <iconv/loop.c>
/* Convert from UCS2 to the internal (UCS4-like) format. */
#define MIN_NEEDED_INPUT MIN_NEEDED_FROM
#define MIN_NEEDED_OUTPUT MIN_NEEDED_TO
#define LOOPFCT FROM_LOOP
#define BODY \
{ \
Update. 2000-03-28 Ulrich Drepper <drepper@redhat.com> * iconvdata/TESTS: Use UCS-2BE instead of UCS2. * iconv/loop.c: Define get16, get32, put16, and put32 macros to allow as well reading from/writing to unaligned addresses on machines which don't support this in hardware. Use FCTNAME macro to define function name. Include the file a second time for platforms which need special unaligned handling. * iconv/skeleton.c: Define get16u, get32u, put16u, and put32u macros to access potentially unaligned addresses. These macros are intended to be used only outside the loops. (unaligned): New definition. In case the machine can handle unaligned access define as zero. Otherwise as a variable which is initialized as nonzero in case the buffer passed in at runtime is unaligned with respect to the character set encoding involved. Call aligned or unaligned looop functions according to unaligned variable. * iconvdata/8bit-gap.c: Use get16, get32, put16, and put32 instead of direct casting pointer to potentially handle unaligned memory accesses. * iconvdata/8bit-generic.c: Likewise. * iconvdata/ansi_x3.110.c: Likewise. * iconvdata/big5.c: Likewise. * iconvdata/euc-cn.c: Likewise. * iconvdata/euc-jp.c: Likewise. * iconvdata/euc-kr.c: Likewise. * iconvdata/euc-tw.c: Likewise. * iconvdata/gbk.c: Likewise. * iconvdata/iso-2022-cn.c: Likewise. * iconvdata/iso-2022-jp.c: Likewise. * iconvdata/iso-2022-kr.c: Likewise. * iconvdata/iso646.c: Likewise. * iconvdata/iso_6937-2.c: Likewise. * iconvdata/iso_6937.c: Likewise. * iconvdata/johab.c: Likewise. * iconvdata/sjis.c: Likewise. * iconvdata/t.61.c: Likewise. * iconvdata/uhc.c: Likewise. * iconvdata/unicode.c: Likewise. * iconvdata/utf-16.c: Likewise. * locale/programs/simple-hash.c: Little optimizations. Remove K&R prototypes. * malloc/Versions [libc] (GLIBC_2.2): Add mcheck_check_all. * malloc/mcheck.c (mcheck_check_all): Renamed from check_all and made public. * malloc/mcheck.h (mcheck_check_all): Declare. * stdio-common/Makefile (tests): Add tst-obprintf.
2000-03-28 17:33:37 +00:00
uint16_t u1 = get16 (inptr); \
\
if (swap) \
u1 = bswap_16 (u1); \
\
Update. 2000-03-28 Ulrich Drepper <drepper@redhat.com> * iconvdata/TESTS: Use UCS-2BE instead of UCS2. * iconv/loop.c: Define get16, get32, put16, and put32 macros to allow as well reading from/writing to unaligned addresses on machines which don't support this in hardware. Use FCTNAME macro to define function name. Include the file a second time for platforms which need special unaligned handling. * iconv/skeleton.c: Define get16u, get32u, put16u, and put32u macros to access potentially unaligned addresses. These macros are intended to be used only outside the loops. (unaligned): New definition. In case the machine can handle unaligned access define as zero. Otherwise as a variable which is initialized as nonzero in case the buffer passed in at runtime is unaligned with respect to the character set encoding involved. Call aligned or unaligned looop functions according to unaligned variable. * iconvdata/8bit-gap.c: Use get16, get32, put16, and put32 instead of direct casting pointer to potentially handle unaligned memory accesses. * iconvdata/8bit-generic.c: Likewise. * iconvdata/ansi_x3.110.c: Likewise. * iconvdata/big5.c: Likewise. * iconvdata/euc-cn.c: Likewise. * iconvdata/euc-jp.c: Likewise. * iconvdata/euc-kr.c: Likewise. * iconvdata/euc-tw.c: Likewise. * iconvdata/gbk.c: Likewise. * iconvdata/iso-2022-cn.c: Likewise. * iconvdata/iso-2022-jp.c: Likewise. * iconvdata/iso-2022-kr.c: Likewise. * iconvdata/iso646.c: Likewise. * iconvdata/iso_6937-2.c: Likewise. * iconvdata/iso_6937.c: Likewise. * iconvdata/johab.c: Likewise. * iconvdata/sjis.c: Likewise. * iconvdata/t.61.c: Likewise. * iconvdata/uhc.c: Likewise. * iconvdata/unicode.c: Likewise. * iconvdata/utf-16.c: Likewise. * locale/programs/simple-hash.c: Little optimizations. Remove K&R prototypes. * malloc/Versions [libc] (GLIBC_2.2): Add mcheck_check_all. * malloc/mcheck.c (mcheck_check_all): Renamed from check_all and made public. * malloc/mcheck.h (mcheck_check_all): Declare. * stdio-common/Makefile (tests): Add tst-obprintf.
2000-03-28 17:33:37 +00:00
put32 (outptr, u1); \
\
inptr += 2; \
outptr += 4; \
}
#define EXTRA_LOOP_DECLS \
, struct __gconv_step_data *step_data, int swap
#include <iconv/loop.c>
/* Now define the toplevel functions. */
#include <iconv/skeleton.c>