S/390: Hardware iconv modules.

This commit is contained in:
Andreas Krebbel 2009-07-24 08:29:06 -07:00 committed by Ulrich Drepper
parent 01b597da40
commit f957edded8
8 changed files with 1628 additions and 8 deletions

View File

@ -1,3 +1,19 @@
2009-06-16 Andreas Krebbel <Andreas.Krebbel@de.ibm.com>
* sysdeps/s390/dl-procinfo.c (_dl_s390_cap_flags): "hpage",
"etf3enh" and "highgprs" added.
(_dl_s390_platforms): "z10" added.
* sysdeps/s390/dl-procinfo.h (_DL_HWCAP_COUNT, _DL_PLATFORMS_COUNT):
Increased for the new entries.
(HWCAP enum): HWCAP_S390_HPAGE, HWCAP_S390_ETF3EH and
HWCAP_S390_HIGH_GPRS added.
* sysdeps/s390/s390-64/Makefile: Adjusted to build the new modules.
* sysdeps/s390/s390-64/iso-8859-1_cp037_z900.c: New file.
* sysdeps/s390/s390-64/utf16-utf32-z9.c: New file.
* sysdeps/s390/s390-64/utf8-utf16-z9.c: New file.
* sysdeps/s390/s390-64/utf8-utf32-z9.c: New file.
2009-07-23 Ulrich Drepper <drepper@redhat.com>
* sysdeps/x86_64/cacheinfo.c [USE_MULTIARCH]: Rearrange code to

View File

@ -1,5 +1,5 @@
/* Data for s390 version of processor capability information.
Copyright (C) 2006 Free Software Foundation, Inc.
Copyright (C) 2006, 2009 Free Software Foundation, Inc.
This file is part of the GNU C Library.
Contributed by Martin Schwidefsky <schwidefsky@de.ibm.com>, 2006.
@ -47,11 +47,11 @@
#if !defined PROCINFO_DECL && defined SHARED
._dl_s390_cap_flags
#else
PROCINFO_CLASS const char _dl_s390_cap_flags[7][6]
PROCINFO_CLASS const char _dl_s390_cap_flags[10][8]
#endif
#ifndef PROCINFO_DECL
= {
"esan3", "zarch", "stfle", "msa", "ldisp", "eimm", "dfp"
"esan3", "zarch", "stfle", "msa", "ldisp", "eimm", "dfp", "hpage", "etf3enh", "highgprs"
}
#endif
#if !defined SHARED || defined PROCINFO_DECL
@ -63,11 +63,11 @@ PROCINFO_CLASS const char _dl_s390_cap_flags[7][6]
#if !defined PROCINFO_DECL && defined SHARED
._dl_s390_platforms
#else
PROCINFO_CLASS const char _dl_s390_platforms[4][7]
PROCINFO_CLASS const char _dl_s390_platforms[5][7]
#endif
#ifndef PROCINFO_DECL
= {
"g5", "z900", "z990", "z9-109"
"g5", "z900", "z990", "z9-109", "z10"
}
#endif
#if !defined SHARED || defined PROCINFO_DECL

View File

@ -1,5 +1,5 @@
/* s390 version of processor capability information handling macros.
Copyright (C) 2006 Free Software Foundation, Inc.
Copyright (C) 2006, 2009 Free Software Foundation, Inc.
This file is part of the GNU C Library.
Contributed by Martin Schwidefsky <schwidefsky@de.ibm.com>, 2006.
@ -22,9 +22,9 @@
#define _DL_PROCINFO_H 1
#include <ldsodefs.h>
#define _DL_HWCAP_COUNT 7
#define _DL_HWCAP_COUNT 10
#define _DL_PLATFORMS_COUNT 4
#define _DL_PLATFORMS_COUNT 5
/* The kernel provides up to 32 capability bits with elf_hwcap. */
#define _DL_FIRST_PLATFORM 32
@ -45,6 +45,9 @@ enum
HWCAP_S390_LDISP = 1 << 4,
HWCAP_S390_EIMM = 1 << 5,
HWCAP_S390_DFP = 1 << 6,
HWCAP_S390_HPAGE = 1 << 7,
HWCAP_S390_ETF3EH = 1 << 8,
HWCAP_S390_HIGH_GPRS = 1 << 9,
};
#define HWCAP_IMPORTANT (HWCAP_S390_ZARCH | HWCAP_S390_LDISP \

View File

@ -9,3 +9,70 @@ CFLAGS-rtld.c += -Wno-uninitialized -Wno-unused
CFLAGS-dl-load.c += -Wno-unused
CFLAGS-dl-reloc.c += -Wno-unused
endif
ifeq ($(subdir),iconvdata)
ISO-8859-1_CP037_Z900-routines := iso-8859-1_cp037_z900
ISO-8859-1_CP037_Z900-map := gconv.map
UTF8_UTF32_Z9-routines := utf8-utf32-z9
UTF8_UTF32_Z9-map := gconv.map
UTF16_UTF32_Z9-routines := utf16-utf32-z9
UTF16_UTF32_Z9-map := gconv.map
UTF8_UTF16_Z9-routines := utf8-utf16-z9
UTF8_UTF16_Z9-map := gconv.map
s390x-iconv-modules = ISO-8859-1_CP037_Z900 UTF8_UTF16_Z9 UTF16_UTF32_Z9 UTF8_UTF32_Z9
extra-modules-left += $(s390x-iconv-modules)
include extra-module.mk
extra-objs += $(addsuffix .so, $(s390x-iconv-modules))
install-others += $(patsubst %, $(inst_gconvdir)/%.so, $(s390x-iconv-modules))
distribute += iso-8859-1_cp037_z900.c utf8-utf32-z9.c utf16-utf32-z9.c utf8-utf16-z9.c
$(patsubst %, $(inst_gconvdir)/%.so, $(s390x-iconv-modules)) : \
$(inst_gconvdir)/%.so: $(objpfx)%.so $(+force)
$(do-install-program)
$(objpfx)gconv-modules-s390: gconv-modules $(+force)
cp $< $@
echo >> $@
echo "# S/390 hardware accelerated modules" >> $@
echo -n "module ISO-8859-1// IBM037// " >> $@
echo " ISO-8859-1_CP037_Z900 1" >> $@
echo -n "module IBM037// ISO-8859-1// " >> $@
echo " ISO-8859-1_CP037_Z900 1" >> $@
echo -n "module ISO-10646/UTF8/ UTF-32// " >> $@
echo " UTF8_UTF32_Z9 1" >> $@
echo -n "module UTF-32BE// ISO-10646/UTF8/ " >> $@
echo " UTF8_UTF32_Z9 1" >> $@
echo -n "module ISO-10646/UTF8/ UTF-32BE// " >> $@
echo " UTF8_UTF32_Z9 1" >> $@
echo -n "module UTF-16BE// UTF-32// " >> $@
echo " UTF16_UTF32_Z9 1" >> $@
echo -n "module UTF-32BE// UTF-16// " >> $@
echo " UTF16_UTF32_Z9 1" >> $@
echo -n "module INTERNAL UTF-16// " >> $@
echo " UTF16_UTF32_Z9 1" >> $@
echo -n "module UTF-32BE// UTF-16BE// " >> $@
echo " UTF16_UTF32_Z9 1" >> $@
echo -n "module INTERNAL UTF-16BE// " >> $@
echo " UTF16_UTF32_Z9 1" >> $@
echo -n "module UTF-16BE// UTF-32BE// " >> $@
echo " UTF16_UTF32_Z9 1" >> $@
echo -n "module UTF-16BE// INTERNAL " >> $@
echo " UTF16_UTF32_Z9 1" >> $@
echo -n "module UTF-16BE// ISO-10646/UTF8/ " >> $@
echo " UTF8_UTF16_Z9 1" >> $@
echo -n "module ISO-10646/UTF8/ UTF-16// " >> $@
echo " UTF8_UTF16_Z9 1" >> $@
echo -n "module ISO-10646/UTF8/ UTF-16BE// " >> $@
echo " UTF8_UTF16_Z9 1" >> $@
$(inst_gconvdir)/gconv-modules: $(objpfx)gconv-modules-s390 $(+force)
$(do-install)
endif

View File

@ -0,0 +1,238 @@
/* Conversion between ISO 8859-1 and IBM037.
This module uses the Z900 variant of the Translate One To One
instruction.
Copyright (C) 1997-2009 Free Software Foundation, Inc.
Author: Andreas Krebbel <Andreas.Krebbel@de.ibm.com>
Based on the work by Ulrich Drepper <drepper@cygnus.com>, 1997.
Thanks to Daniel Appich who covered the relevant performance work
in his diploma thesis.
This is free software; you can redistribute it and/or
modify it under the terms of the GNU Lesser General Public
License as published by the Free Software Foundation; either
version 2.1 of the License, or (at your option) any later version.
This is distributed in the hope that it will be useful,
but WITHOUT ANY WARRANTY; without even the implied warranty of
MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the GNU
Lesser General Public License for more details.
You should have received a copy of the GNU Lesser General Public
License along with the GNU C Library; if not, write to the Free
Software Foundation, Inc., 59 Temple Place, Suite 330, Boston, MA
02111-1307 USA. */
#include <dlfcn.h>
#include <stdint.h>
// conversion table from ISO-8859-1 to IBM037
static const unsigned char table_iso8859_1_to_cp037[256]
__attribute__ ((aligned (8))) =
{
[0x00] = 0x00, [0x01] = 0x01, [0x02] = 0x02, [0x03] = 0x03,
[0x04] = 0x37, [0x05] = 0x2D, [0x06] = 0x2E, [0x07] = 0x2F,
[0x08] = 0x16, [0x09] = 0x05, [0x0A] = 0x25, [0x0B] = 0x0B,
[0x0C] = 0x0C, [0x0D] = 0x0D, [0x0E] = 0x0E, [0x0F] = 0x0F,
[0x10] = 0x10, [0x11] = 0x11, [0x12] = 0x12, [0x13] = 0x13,
[0x14] = 0x3C, [0x15] = 0x3D, [0x16] = 0x32, [0x17] = 0x26,
[0x18] = 0x18, [0x19] = 0x19, [0x1A] = 0x3F, [0x1B] = 0x27,
[0x1C] = 0x1C, [0x1D] = 0x1D, [0x1E] = 0x1E, [0x1F] = 0x1F,
[0x20] = 0x40, [0x21] = 0x5A, [0x22] = 0x7F, [0x23] = 0x7B,
[0x24] = 0x5B, [0x25] = 0x6C, [0x26] = 0x50, [0x27] = 0x7D,
[0x28] = 0x4D, [0x29] = 0x5D, [0x2A] = 0x5C, [0x2B] = 0x4E,
[0x2C] = 0x6B, [0x2D] = 0x60, [0x2E] = 0x4B, [0x2F] = 0x61,
[0x30] = 0xF0, [0x31] = 0xF1, [0x32] = 0xF2, [0x33] = 0xF3,
[0x34] = 0xF4, [0x35] = 0xF5, [0x36] = 0xF6, [0x37] = 0xF7,
[0x38] = 0xF8, [0x39] = 0xF9, [0x3A] = 0x7A, [0x3B] = 0x5E,
[0x3C] = 0x4C, [0x3D] = 0x7E, [0x3E] = 0x6E, [0x3F] = 0x6F,
[0x40] = 0x7C, [0x41] = 0xC1, [0x42] = 0xC2, [0x43] = 0xC3,
[0x44] = 0xC4, [0x45] = 0xC5, [0x46] = 0xC6, [0x47] = 0xC7,
[0x48] = 0xC8, [0x49] = 0xC9, [0x4A] = 0xD1, [0x4B] = 0xD2,
[0x4C] = 0xD3, [0x4D] = 0xD4, [0x4E] = 0xD5, [0x4F] = 0xD6,
[0x50] = 0xD7, [0x51] = 0xD8, [0x52] = 0xD9, [0x53] = 0xE2,
[0x54] = 0xE3, [0x55] = 0xE4, [0x56] = 0xE5, [0x57] = 0xE6,
[0x58] = 0xE7, [0x59] = 0xE8, [0x5A] = 0xE9, [0x5B] = 0xBA,
[0x5C] = 0xE0, [0x5D] = 0xBB, [0x5E] = 0xB0, [0x5F] = 0x6D,
[0x60] = 0x79, [0x61] = 0x81, [0x62] = 0x82, [0x63] = 0x83,
[0x64] = 0x84, [0x65] = 0x85, [0x66] = 0x86, [0x67] = 0x87,
[0x68] = 0x88, [0x69] = 0x89, [0x6A] = 0x91, [0x6B] = 0x92,
[0x6C] = 0x93, [0x6D] = 0x94, [0x6E] = 0x95, [0x6F] = 0x96,
[0x70] = 0x97, [0x71] = 0x98, [0x72] = 0x99, [0x73] = 0xA2,
[0x74] = 0xA3, [0x75] = 0xA4, [0x76] = 0xA5, [0x77] = 0xA6,
[0x78] = 0xA7, [0x79] = 0xA8, [0x7A] = 0xA9, [0x7B] = 0xC0,
[0x7C] = 0x4F, [0x7D] = 0xD0, [0x7E] = 0xA1, [0x7F] = 0x07,
[0x80] = 0x20, [0x81] = 0x21, [0x82] = 0x22, [0x83] = 0x23,
[0x84] = 0x24, [0x85] = 0x15, [0x86] = 0x06, [0x87] = 0x17,
[0x88] = 0x28, [0x89] = 0x29, [0x8A] = 0x2A, [0x8B] = 0x2B,
[0x8C] = 0x2C, [0x8D] = 0x09, [0x8E] = 0x0A, [0x8F] = 0x1B,
[0x90] = 0x30, [0x91] = 0x31, [0x92] = 0x1A, [0x93] = 0x33,
[0x94] = 0x34, [0x95] = 0x35, [0x96] = 0x36, [0x97] = 0x08,
[0x98] = 0x38, [0x99] = 0x39, [0x9A] = 0x3A, [0x9B] = 0x3B,
[0x9C] = 0x04, [0x9D] = 0x14, [0x9E] = 0x3E, [0x9F] = 0xFF,
[0xA0] = 0x41, [0xA1] = 0xAA, [0xA2] = 0x4A, [0xA3] = 0xB1,
[0xA4] = 0x9F, [0xA5] = 0xB2, [0xA6] = 0x6A, [0xA7] = 0xB5,
[0xA8] = 0xBD, [0xA9] = 0xB4, [0xAA] = 0x9A, [0xAB] = 0x8A,
[0xAC] = 0x5F, [0xAD] = 0xCA, [0xAE] = 0xAF, [0xAF] = 0xBC,
[0xB0] = 0x90, [0xB1] = 0x8F, [0xB2] = 0xEA, [0xB3] = 0xFA,
[0xB4] = 0xBE, [0xB5] = 0xA0, [0xB6] = 0xB6, [0xB7] = 0xB3,
[0xB8] = 0x9D, [0xB9] = 0xDA, [0xBA] = 0x9B, [0xBB] = 0x8B,
[0xBC] = 0xB7, [0xBD] = 0xB8, [0xBE] = 0xB9, [0xBF] = 0xAB,
[0xC0] = 0x64, [0xC1] = 0x65, [0xC2] = 0x62, [0xC3] = 0x66,
[0xC4] = 0x63, [0xC5] = 0x67, [0xC6] = 0x9E, [0xC7] = 0x68,
[0xC8] = 0x74, [0xC9] = 0x71, [0xCA] = 0x72, [0xCB] = 0x73,
[0xCC] = 0x78, [0xCD] = 0x75, [0xCE] = 0x76, [0xCF] = 0x77,
[0xD0] = 0xAC, [0xD1] = 0x69, [0xD2] = 0xED, [0xD3] = 0xEE,
[0xD4] = 0xEB, [0xD5] = 0xEF, [0xD6] = 0xEC, [0xD7] = 0xBF,
[0xD8] = 0x80, [0xD9] = 0xFD, [0xDA] = 0xFE, [0xDB] = 0xFB,
[0xDC] = 0xFC, [0xDD] = 0xAD, [0xDE] = 0xAE, [0xDF] = 0x59,
[0xE0] = 0x44, [0xE1] = 0x45, [0xE2] = 0x42, [0xE3] = 0x46,
[0xE4] = 0x43, [0xE5] = 0x47, [0xE6] = 0x9C, [0xE7] = 0x48,
[0xE8] = 0x54, [0xE9] = 0x51, [0xEA] = 0x52, [0xEB] = 0x53,
[0xEC] = 0x58, [0xED] = 0x55, [0xEE] = 0x56, [0xEF] = 0x57,
[0xF0] = 0x8C, [0xF1] = 0x49, [0xF2] = 0xCD, [0xF3] = 0xCE,
[0xF4] = 0xCB, [0xF5] = 0xCF, [0xF6] = 0xCC, [0xF7] = 0xE1,
[0xF8] = 0x70, [0xF9] = 0xDD, [0xFA] = 0xDE, [0xFB] = 0xDB,
[0xFC] = 0xDC, [0xFD] = 0x8D, [0xFE] = 0x8E, [0xFF] = 0xDF
};
// conversion table from IBM037 to ISO-8859-1
static const unsigned char table_cp037_iso8859_1[256]
__attribute__ ((aligned (8))) =
{
[0x00] = 0x00, [0x01] = 0x01, [0x02] = 0x02, [0x03] = 0x03,
[0x04] = 0x9C, [0x05] = 0x09, [0x06] = 0x86, [0x07] = 0x7F,
[0x08] = 0x97, [0x09] = 0x8D, [0x0A] = 0x8E, [0x0B] = 0x0B,
[0x0C] = 0x0C, [0x0D] = 0x0D, [0x0E] = 0x0E, [0x0F] = 0x0F,
[0x10] = 0x10, [0x11] = 0x11, [0x12] = 0x12, [0x13] = 0x13,
[0x14] = 0x9D, [0x15] = 0x85, [0x16] = 0x08, [0x17] = 0x87,
[0x18] = 0x18, [0x19] = 0x19, [0x1A] = 0x92, [0x1B] = 0x8F,
[0x1C] = 0x1C, [0x1D] = 0x1D, [0x1E] = 0x1E, [0x1F] = 0x1F,
[0x20] = 0x80, [0x21] = 0x81, [0x22] = 0x82, [0x23] = 0x83,
[0x24] = 0x84, [0x25] = 0x0A, [0x26] = 0x17, [0x27] = 0x1B,
[0x28] = 0x88, [0x29] = 0x89, [0x2A] = 0x8A, [0x2B] = 0x8B,
[0x2C] = 0x8C, [0x2D] = 0x05, [0x2E] = 0x06, [0x2F] = 0x07,
[0x30] = 0x90, [0x31] = 0x91, [0x32] = 0x16, [0x33] = 0x93,
[0x34] = 0x94, [0x35] = 0x95, [0x36] = 0x96, [0x37] = 0x04,
[0x38] = 0x98, [0x39] = 0x99, [0x3A] = 0x9A, [0x3B] = 0x9B,
[0x3C] = 0x14, [0x3D] = 0x15, [0x3E] = 0x9E, [0x3F] = 0x1A,
[0x40] = 0x20, [0x41] = 0xA0, [0x42] = 0xE2, [0x43] = 0xE4,
[0x44] = 0xE0, [0x45] = 0xE1, [0x46] = 0xE3, [0x47] = 0xE5,
[0x48] = 0xE7, [0x49] = 0xF1, [0x4A] = 0xA2, [0x4B] = 0x2E,
[0x4C] = 0x3C, [0x4D] = 0x28, [0x4E] = 0x2B, [0x4F] = 0x7C,
[0x50] = 0x26, [0x51] = 0xE9, [0x52] = 0xEA, [0x53] = 0xEB,
[0x54] = 0xE8, [0x55] = 0xED, [0x56] = 0xEE, [0x57] = 0xEF,
[0x58] = 0xEC, [0x59] = 0xDF, [0x5A] = 0x21, [0x5B] = 0x24,
[0x5C] = 0x2A, [0x5D] = 0x29, [0x5E] = 0x3B, [0x5F] = 0xAC,
[0x60] = 0x2D, [0x61] = 0x2F, [0x62] = 0xC2, [0x63] = 0xC4,
[0x64] = 0xC0, [0x65] = 0xC1, [0x66] = 0xC3, [0x67] = 0xC5,
[0x68] = 0xC7, [0x69] = 0xD1, [0x6A] = 0xA6, [0x6B] = 0x2C,
[0x6C] = 0x25, [0x6D] = 0x5F, [0x6E] = 0x3E, [0x6F] = 0x3F,
[0x70] = 0xF8, [0x71] = 0xC9, [0x72] = 0xCA, [0x73] = 0xCB,
[0x74] = 0xC8, [0x75] = 0xCD, [0x76] = 0xCE, [0x77] = 0xCF,
[0x78] = 0xCC, [0x79] = 0x60, [0x7A] = 0x3A, [0x7B] = 0x23,
[0x7C] = 0x40, [0x7D] = 0x27, [0x7E] = 0x3D, [0x7F] = 0x22,
[0x80] = 0xD8, [0x81] = 0x61, [0x82] = 0x62, [0x83] = 0x63,
[0x84] = 0x64, [0x85] = 0x65, [0x86] = 0x66, [0x87] = 0x67,
[0x88] = 0x68, [0x89] = 0x69, [0x8A] = 0xAB, [0x8B] = 0xBB,
[0x8C] = 0xF0, [0x8D] = 0xFD, [0x8E] = 0xFE, [0x8F] = 0xB1,
[0x90] = 0xB0, [0x91] = 0x6A, [0x92] = 0x6B, [0x93] = 0x6C,
[0x94] = 0x6D, [0x95] = 0x6E, [0x96] = 0x6F, [0x97] = 0x70,
[0x98] = 0x71, [0x99] = 0x72, [0x9A] = 0xAA, [0x9B] = 0xBA,
[0x9C] = 0xE6, [0x9D] = 0xB8, [0x9E] = 0xC6, [0x9F] = 0xA4,
[0xA0] = 0xB5, [0xA1] = 0x7E, [0xA2] = 0x73, [0xA3] = 0x74,
[0xA4] = 0x75, [0xA5] = 0x76, [0xA6] = 0x77, [0xA7] = 0x78,
[0xA8] = 0x79, [0xA9] = 0x7A, [0xAA] = 0xA1, [0xAB] = 0xBF,
[0xAC] = 0xD0, [0xAD] = 0xDD, [0xAE] = 0xDE, [0xAF] = 0xAE,
[0xB0] = 0x5E, [0xB1] = 0xA3, [0xB2] = 0xA5, [0xB3] = 0xB7,
[0xB4] = 0xA9, [0xB5] = 0xA7, [0xB6] = 0xB6, [0xB7] = 0xBC,
[0xB8] = 0xBD, [0xB9] = 0xBE, [0xBA] = 0x5B, [0xBB] = 0x5D,
[0xBC] = 0xAF, [0xBD] = 0xA8, [0xBE] = 0xB4, [0xBF] = 0xD7,
[0xC0] = 0x7B, [0xC1] = 0x41, [0xC2] = 0x42, [0xC3] = 0x43,
[0xC4] = 0x44, [0xC5] = 0x45, [0xC6] = 0x46, [0xC7] = 0x47,
[0xC8] = 0x48, [0xC9] = 0x49, [0xCA] = 0xAD, [0xCB] = 0xF4,
[0xCC] = 0xF6, [0xCD] = 0xF2, [0xCE] = 0xF3, [0xCF] = 0xF5,
[0xD0] = 0x7D, [0xD1] = 0x4A, [0xD2] = 0x4B, [0xD3] = 0x4C,
[0xD4] = 0x4D, [0xD5] = 0x4E, [0xD6] = 0x4F, [0xD7] = 0x50,
[0xD8] = 0x51, [0xD9] = 0x52, [0xDA] = 0xB9, [0xDB] = 0xFB,
[0xDC] = 0xFC, [0xDD] = 0xF9, [0xDE] = 0xFA, [0xDF] = 0xFF,
[0xE0] = 0x5C, [0xE1] = 0xF7, [0xE2] = 0x53, [0xE3] = 0x54,
[0xE4] = 0x55, [0xE5] = 0x56, [0xE6] = 0x57, [0xE7] = 0x58,
[0xE8] = 0x59, [0xE9] = 0x5A, [0xEA] = 0xB2, [0xEB] = 0xD4,
[0xEC] = 0xD6, [0xED] = 0xD2, [0xEE] = 0xD3, [0xEF] = 0xD5,
[0xF0] = 0x30, [0xF1] = 0x31, [0xF2] = 0x32, [0xF3] = 0x33,
[0xF4] = 0x34, [0xF5] = 0x35, [0xF6] = 0x36, [0xF7] = 0x37,
[0xF8] = 0x38, [0xF9] = 0x39, [0xFA] = 0xB3, [0xFB] = 0xDB,
[0xFC] = 0xDC, [0xFD] = 0xD9, [0xFE] = 0xDA, [0xFF] = 0x9F
};
/* Definitions used in the body of the `gconv' function. */
#define CHARSET_NAME "ISO-8859-1//"
#define FROM_LOOP iso8859_1_to_cp037_z900
#define TO_LOOP cp037_to_iso8859_1_z900
#define DEFINE_INIT 1
#define DEFINE_FINI 1
#define MIN_NEEDED_FROM 1
#define MIN_NEEDED_TO 1
/* The Z900 variant of troo forces us to always specify a test
character which ends the translation. So if we run into the
situation where the translation has been interrupted due to the
test character we translate the character by hand and jump back
into the instruction. */
#define TROO_LOOP(TABLE) \
{ \
register const unsigned char test asm ("0") = 0; \
register const unsigned char *pTable asm ("1") = TABLE; \
register unsigned char *pOutput asm ("2") = outptr; \
register uint64_t length asm ("3"); \
const unsigned char* pInput = inptr; \
uint64_t tmp; \
\
length = (inend - inptr < outend - outptr \
? inend - inptr : outend - outptr); \
\
asm volatile ("0: \n\t" \
" troo %0,%1 \n\t" \
" jz 1f \n\t" \
" jo 0b \n\t" \
" llgc %3,0(%1) \n\t" \
" la %3,0(%3,%4) \n\t" \
" mvc 0(1,%0),0(%3) \n\t" \
" aghi %1,1 \n\t" \
" aghi %0,1 \n\t" \
" aghi %2,-1 \n\t" \
" j 0b \n\t" \
"1: \n" \
\
: "+a" (pOutput), "+a" (pInput), "+d" (length), "=&a" (tmp) \
: "a" (pTable), "d" (test) \
: "cc"); \
\
inptr = pInput; \
outptr = pOutput; \
}
/* First define the conversion function from ISO 8859-1 to CP037. */
#define MIN_NEEDED_INPUT MIN_NEEDED_FROM
#define MIN_NEEDED_OUTPUT MIN_NEEDED_TO
#define LOOPFCT FROM_LOOP
#define BODY TROO_LOOP (table_iso8859_1_to_cp037)
#include <iconv/loop.c>
/* Next, define the conversion function from CP037 to ISO 8859-1. */
#define MIN_NEEDED_INPUT MIN_NEEDED_TO
#define MIN_NEEDED_OUTPUT MIN_NEEDED_FROM
#define LOOPFCT TO_LOOP
#define BODY TROO_LOOP (table_cp037_iso8859_1);
#include <iconv/loop.c>
/* Now define the toplevel functions. */
#include <iconv/skeleton.c>

View File

@ -0,0 +1,325 @@
/* Conversion between UTF-16 and UTF-32 BE/internal.
This module uses the Z9-109 variants of the Convert Unicode
instructions.
Copyright (C) 1997-2009 Free Software Foundation, Inc.
Author: Andreas Krebbel <Andreas.Krebbel@de.ibm.com>
Based on the work by Ulrich Drepper <drepper@cygnus.com>, 1997.
Thanks to Daniel Appich who covered the relevant performance work
in his diploma thesis.
This is free software; you can redistribute it and/or
modify it under the terms of the GNU Lesser General Public
License as published by the Free Software Foundation; either
version 2.1 of the License, or (at your option) any later version.
This is distributed in the hope that it will be useful,
but WITHOUT ANY WARRANTY; without even the implied warranty of
MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the GNU
Lesser General Public License for more details.
You should have received a copy of the GNU Lesser General Public
License along with the GNU C Library; if not, write to the Free
Software Foundation, Inc., 59 Temple Place, Suite 330, Boston, MA
02111-1307 USA. */
#include <dlfcn.h>
#include <stdint.h>
#include <unistd.h>
#include <dl-procinfo.h>
#include <gconv.h>
/* UTF-32 big endian byte order mark. */
#define BOM_UTF32 0x0000feffu
/* UTF-16 big endian byte order mark. */
#define BOM_UTF16 0xfeff
#define DEFINE_INIT 0
#define DEFINE_FINI 0
#define MIN_NEEDED_FROM 2
#define MAX_NEEDED_FROM 4
#define MIN_NEEDED_TO 4
#define FROM_LOOP from_utf16_loop
#define TO_LOOP to_utf16_loop
#define FROM_DIRECTION (dir == from_utf16)
#define PREPARE_LOOP \
enum direction dir = ((struct utf16_data *) step->__data)->dir; \
int emit_bom = ((struct utf16_data *) step->__data)->emit_bom; \
\
if (emit_bom && !data->__internal_use \
&& data->__invocation_counter == 0) \
{ \
if (dir == to_utf16) \
{ \
/* Emit the UTF-16 Byte Order Mark. */ \
if (__builtin_expect (outbuf + 2 > outend, 0)) \
return __GCONV_FULL_OUTPUT; \
\
put16u (outbuf, BOM_UTF16); \
outbuf += 2; \
} \
else \
{ \
/* Emit the UTF-32 Byte Order Mark. */ \
if (__builtin_expect (outbuf + 4 > outend, 0)) \
return __GCONV_FULL_OUTPUT; \
\
put32u (outbuf, BOM_UTF32); \
outbuf += 4; \
} \
}
/* Direction of the transformation. */
enum direction
{
illegal_dir,
to_utf16,
from_utf16
};
struct utf16_data
{
enum direction dir;
int emit_bom;
};
extern int gconv_init (struct __gconv_step *step);
int
gconv_init (struct __gconv_step *step)
{
/* Determine which direction. */
struct utf16_data *new_data;
enum direction dir = illegal_dir;
int emit_bom;
int result;
emit_bom = (__strcasecmp (step->__to_name, "UTF-32//") == 0
|| __strcasecmp (step->__to_name, "UTF-16//") == 0);
if (__strcasecmp (step->__from_name, "UTF-16BE//") == 0
&& (__strcasecmp (step->__to_name, "UTF-32//") == 0
|| __strcasecmp (step->__to_name, "UTF-32BE//") == 0
|| __strcasecmp (step->__to_name, "INTERNAL") == 0))
{
dir = from_utf16;
}
else if ((__strcasecmp (step->__to_name, "UTF-16//") == 0
|| __strcasecmp (step->__to_name, "UTF-16BE//") == 0)
&& (__strcasecmp (step->__from_name, "UTF-32BE//") == 0
|| __strcasecmp (step->__from_name, "INTERNAL") == 0))
{
dir = to_utf16;
}
result = __GCONV_NOCONV;
if (dir != illegal_dir)
{
new_data = (struct utf16_data *) malloc (sizeof (struct utf16_data));
result = __GCONV_NOMEM;
if (new_data != NULL)
{
new_data->dir = dir;
new_data->emit_bom = emit_bom;
step->__data = new_data;
if (dir == from_utf16)
{
step->__min_needed_from = MIN_NEEDED_FROM;
step->__max_needed_from = MIN_NEEDED_FROM;
step->__min_needed_to = MIN_NEEDED_TO;
step->__max_needed_to = MIN_NEEDED_TO;
}
else
{
step->__min_needed_from = MIN_NEEDED_TO;
step->__max_needed_from = MIN_NEEDED_TO;
step->__min_needed_to = MIN_NEEDED_FROM;
step->__max_needed_to = MIN_NEEDED_FROM;
}
step->__stateful = 0;
result = __GCONV_OK;
}
}
return result;
}
extern void gconv_end (struct __gconv_step *data);
void
gconv_end (struct __gconv_step *data)
{
free (data->__data);
}
/* The macro for the hardware loop. This is used for both
directions. */
#define HARDWARE_CONVERT(INSTRUCTION) \
{ \
register const unsigned char* pInput asm ("8") = inptr; \
register unsigned long long inlen asm ("9") = inend - inptr; \
register unsigned char* pOutput asm ("10") = outptr; \
register unsigned long long outlen asm("11") = outend - outptr; \
uint64_t cc = 0; \
\
asm volatile ("0: " INSTRUCTION " \n\t" \
" jo 0b \n\t" \
" ipm %2 \n" \
: "+a" (pOutput), "+a" (pInput), "+d" (cc), \
"+d" (outlen), "+d" (inlen) \
: \
: "cc", "memory"); \
\
inptr = pInput; \
outptr = pOutput; \
cc >>= 28; \
\
if (cc == 1) \
{ \
result = __GCONV_FULL_OUTPUT; \
break; \
} \
else if (cc == 2) \
{ \
result = __GCONV_ILLEGAL_INPUT; \
break; \
} \
}
/* Conversion function from UTF-16 to UTF-32 internal/BE. */
#define MIN_NEEDED_INPUT MIN_NEEDED_FROM
#define MAX_NEEDED_INPUT MAX_NEEDED_FROM
#define MIN_NEEDED_OUTPUT MIN_NEEDED_TO
#define LOOPFCT FROM_LOOP
/* The software routine is copied from utf-16.c (minus bytes
swapping). */
#define BODY \
{ \
if (GLRO (dl_hwcap) & HWCAP_S390_ETF3EH) \
{ \
HARDWARE_CONVERT ("cu24 %0, %1, 1"); \
if (inptr != inend) \
{ \
/* Check if the third byte is \
a valid start of a UTF-16 surrogate. */ \
if (inend - inptr == 3 && (inptr[3] & 0xfc) != 0xdc) \
STANDARD_FROM_LOOP_ERR_HANDLER (3); \
\
result = __GCONV_INCOMPLETE_INPUT; \
break; \
} \
continue; \
} \
\
uint16_t u1 = get16 (inptr); \
\
if (__builtin_expect (u1 < 0xd800, 1) || u1 > 0xdfff) \
{ \
/* No surrogate. */ \
put32 (outptr, u1); \
inptr += 2; \
} \
else \
{ \
/* It's a surrogate character. At least the first word says \
it is. */ \
if (__builtin_expect (inptr + 4 > inend, 0)) \
{ \
/* We don't have enough input for another complete input \
character. */ \
result = __GCONV_INCOMPLETE_INPUT; \
break; \
} \
\
inptr += 2; \
uint16_t u2 = get16 (inptr); \
if (__builtin_expect (u2 < 0xdc00, 0) \
|| __builtin_expect (u2 > 0xdfff, 0)) \
{ \
/* This is no valid second word for a surrogate. */ \
inptr -= 2; \
STANDARD_FROM_LOOP_ERR_HANDLER (2); \
} \
\
put32 (outptr, ((u1 - 0xd7c0) << 10) + (u2 - 0xdc00)); \
inptr += 2; \
} \
outptr += 4; \
}
#define LOOP_NEED_FLAGS
#include <iconv/loop.c>
/* Conversion from UTF-32 internal/BE to UTF-16. */
#define MIN_NEEDED_INPUT MIN_NEEDED_TO
#define MIN_NEEDED_OUTPUT MIN_NEEDED_FROM
#define MAX_NEEDED_OUTPUT MAX_NEEDED_FROM
#define LOOPFCT TO_LOOP
/* The software routine is copied from utf-16.c (minus bytes
swapping). */
#define BODY \
{ \
if (GLRO (dl_hwcap) & HWCAP_S390_ETF3EH) \
{ \
HARDWARE_CONVERT ("cu42 %0, %1"); \
\
if (inptr != inend) \
{ \
result = __GCONV_INCOMPLETE_INPUT; \
break; \
} \
continue; \
} \
\
uint32_t c = get32 (inptr); \
\
if (__builtin_expect (c <= 0xd7ff, 1) \
|| (c >=0xdc00 && c <= 0xffff)) \
{ \
/* Two UTF-16 chars. */ \
put16 (outptr, c); \
} \
else if (__builtin_expect (c >= 0x10000, 1) \
&& __builtin_expect (c <= 0x10ffff, 1)) \
{ \
/* Four UTF-16 chars. */ \
uint16_t zabcd = ((c & 0x1f0000) >> 16) - 1; \
uint16_t out; \
\
/* Generate a surrogate character. */ \
if (__builtin_expect (outptr + 4 > outend, 0)) \
{ \
/* Overflow in the output buffer. */ \
result = __GCONV_FULL_OUTPUT; \
break; \
} \
\
out = 0xd800; \
out |= (zabcd & 0xff) << 6; \
out |= (c >> 10) & 0x3f; \
put16 (outptr, out); \
outptr += 2; \
\
out = 0xdc00; \
out |= c & 0x3ff; \
put16 (outptr, out); \
} \
else \
{ \
STANDARD_TO_LOOP_ERR_HANDLER (4); \
} \
outptr += 2; \
inptr += 4; \
}
#define LOOP_NEED_FLAGS
#include <iconv/loop.c>
#include <iconv/skeleton.c>

View File

@ -0,0 +1,463 @@
/* Conversion between UTF-16 and UTF-32 BE/internal.
This module uses the Z9-109 variants of the Convert Unicode
instructions.
Copyright (C) 1997-2009 Free Software Foundation, Inc.
Author: Andreas Krebbel <Andreas.Krebbel@de.ibm.com>
Based on the work by Ulrich Drepper <drepper@cygnus.com>, 1997.
Thanks to Daniel Appich who covered the relevant performance work
in his diploma thesis.
This is free software; you can redistribute it and/or
modify it under the terms of the GNU Lesser General Public
License as published by the Free Software Foundation; either
version 2.1 of the License, or (at your option) any later version.
This is distributed in the hope that it will be useful,
but WITHOUT ANY WARRANTY; without even the implied warranty of
MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the GNU
Lesser General Public License for more details.
You should have received a copy of the GNU Lesser General Public
License along with the GNU C Library; if not, write to the Free
Software Foundation, Inc., 59 Temple Place, Suite 330, Boston, MA
02111-1307 USA. */
#include <dlfcn.h>
#include <stdint.h>
#include <unistd.h>
#include <dl-procinfo.h>
#include <gconv.h>
/* UTF-16 big endian byte order mark. */
#define BOM_UTF16 0xfeff
#define DEFINE_INIT 0
#define DEFINE_FINI 0
#define MIN_NEEDED_FROM 1
#define MAX_NEEDED_FROM 4
#define MIN_NEEDED_TO 2
#define MAX_NEEDED_TO 4
#define FROM_LOOP from_utf8_loop
#define TO_LOOP to_utf8_loop
#define FROM_DIRECTION (dir == from_utf8)
#define PREPARE_LOOP \
enum direction dir = ((struct utf8_data *) step->__data)->dir; \
int emit_bom = ((struct utf8_data *) step->__data)->emit_bom; \
\
if (emit_bom && !data->__internal_use \
&& data->__invocation_counter == 0) \
{ \
/* Emit the UTF-16 Byte Order Mark. */ \
if (__builtin_expect (outbuf + 2 > outend, 0)) \
return __GCONV_FULL_OUTPUT; \
\
put16u (outbuf, BOM_UTF16); \
outbuf += 2; \
}
/* Direction of the transformation. */
enum direction
{
illegal_dir,
to_utf8,
from_utf8
};
struct utf8_data
{
enum direction dir;
int emit_bom;
};
extern int gconv_init (struct __gconv_step *step);
int
gconv_init (struct __gconv_step *step)
{
/* Determine which direction. */
struct utf8_data *new_data;
enum direction dir = illegal_dir;
int emit_bom;
int result;
emit_bom = (__strcasecmp (step->__to_name, "UTF-16//") == 0);
if (__strcasecmp (step->__from_name, "ISO-10646/UTF8/") == 0
&& (__strcasecmp (step->__to_name, "UTF-16//") == 0
|| __strcasecmp (step->__to_name, "UTF-16BE//") == 0))
{
dir = from_utf8;
}
else if (__strcasecmp (step->__from_name, "UTF-16BE//") == 0
&& __strcasecmp (step->__to_name, "ISO-10646/UTF8/") == 0)
{
dir = to_utf8;
}
result = __GCONV_NOCONV;
if (dir != illegal_dir)
{
new_data = (struct utf8_data *) malloc (sizeof (struct utf8_data));
result = __GCONV_NOMEM;
if (new_data != NULL)
{
new_data->dir = dir;
new_data->emit_bom = emit_bom;
step->__data = new_data;
if (dir == from_utf8)
{
step->__min_needed_from = MIN_NEEDED_FROM;
step->__max_needed_from = MIN_NEEDED_FROM;
step->__min_needed_to = MIN_NEEDED_TO;
step->__max_needed_to = MIN_NEEDED_TO;
}
else
{
step->__min_needed_from = MIN_NEEDED_TO;
step->__max_needed_from = MIN_NEEDED_TO;
step->__min_needed_to = MIN_NEEDED_FROM;
step->__max_needed_to = MIN_NEEDED_FROM;
}
step->__stateful = 0;
result = __GCONV_OK;
}
}
return result;
}
extern void gconv_end (struct __gconv_step *data);
void
gconv_end (struct __gconv_step *data)
{
free (data->__data);
}
/* The macro for the hardware loop. This is used for both
directions. */
#define HARDWARE_CONVERT(INSTRUCTION) \
{ \
register const unsigned char* pInput asm ("8") = inptr; \
register unsigned long long inlen asm ("9") = inend - inptr; \
register unsigned char* pOutput asm ("10") = outptr; \
register unsigned long long outlen asm("11") = outend - outptr; \
uint64_t cc = 0; \
\
asm volatile ("0: " INSTRUCTION " \n\t" \
" jo 0b \n\t" \
" ipm %2 \n" \
: "+a" (pOutput), "+a" (pInput), "+d" (cc), \
"+d" (outlen), "+d" (inlen) \
: \
: "cc", "memory"); \
\
inptr = pInput; \
outptr = pOutput; \
cc >>= 28; \
\
if (cc == 1) \
{ \
result = __GCONV_FULL_OUTPUT; \
break; \
} \
else if (cc == 2) \
{ \
result = __GCONV_ILLEGAL_INPUT; \
break; \
} \
}
/* Conversion function from UTF-8 to UTF-16. */
#define MIN_NEEDED_INPUT MIN_NEEDED_FROM
#define MAX_NEEDED_INPUT MAX_NEEDED_FROM
#define MIN_NEEDED_OUTPUT MIN_NEEDED_TO
#define LOOPFCT FROM_LOOP
/* The software implementation is based on the code in gconv_simple.c. */
#define BODY \
{ \
if (GLRO (dl_hwcap) & HWCAP_S390_ETF3EH) \
{ \
HARDWARE_CONVERT ("cu12 %0, %1, 1"); \
\
if (inptr != inend) \
{ \
int i; \
for (i = 1; inptr + i < inend; ++i) \
if ((inptr[i] & 0xc0) != 0x80) \
break; \
\
if (__builtin_expect (inptr + i == inend, 1)) \
{ \
result = __GCONV_INCOMPLETE_INPUT; \
break; \
} \
STANDARD_FROM_LOOP_ERR_HANDLER (i); \
} \
continue; \
} \
\
/* Next input byte. */ \
uint16_t ch = *inptr; \
\
if (__builtin_expect (ch < 0x80, 1)) \
{ \
/* One byte sequence. */ \
++inptr; \
} \
else \
{ \
uint_fast32_t cnt; \
uint_fast32_t i; \
\
if (ch >= 0xc2 && ch < 0xe0) \
{ \
/* We expect two bytes. The first byte cannot be 0xc0 \
or 0xc1, otherwise the wide character could have been \
represented using a single byte. */ \
cnt = 2; \
ch &= 0x1f; \
} \
else if (__builtin_expect ((ch & 0xf0) == 0xe0, 1)) \
{ \
/* We expect three bytes. */ \
cnt = 3; \
ch &= 0x0f; \
} \
else if (__builtin_expect ((ch & 0xf8) == 0xf0, 1)) \
{ \
/* We expect four bytes. */ \
cnt = 4; \
ch &= 0x07; \
} \
else \
{ \
/* Search the end of this ill-formed UTF-8 character. This \
is the next byte with (x & 0xc0) != 0x80. */ \
i = 0; \
do \
++i; \
while (inptr + i < inend \
&& (*(inptr + i) & 0xc0) == 0x80 \
&& i < 5); \
\
errout: \
STANDARD_FROM_LOOP_ERR_HANDLER (i); \
} \
\
if (__builtin_expect (inptr + cnt > inend, 0)) \
{ \
/* We don't have enough input. But before we report \
that check that all the bytes are correct. */ \
for (i = 1; inptr + i < inend; ++i) \
if ((inptr[i] & 0xc0) != 0x80) \
break; \
\
if (__builtin_expect (inptr + i == inend, 1)) \
{ \
result = __GCONV_INCOMPLETE_INPUT; \
break; \
} \
\
goto errout; \
} \
\
if (cnt == 4) \
{ \
/* For 4 byte UTF-8 chars two UTF-16 chars (high and \
low) are needed. */ \
uint16_t zabcd, high, low; \
\
if (__builtin_expect (outptr + 4 > outend, 0)) \
{ \
/* Overflow in the output buffer. */ \
result = __GCONV_FULL_OUTPUT; \
break; \
} \
\
/* See Principles of Operations cu12. */ \
zabcd = (((inptr[0] & 0x7) << 2) | \
((inptr[1] & 0x30) >> 4)) - 1; \
\
/* z-bit must be zero after subtracting 1. */ \
if (zabcd & 0x10) \
STANDARD_FROM_LOOP_ERR_HANDLER (4) \
\
high = (uint16_t)(0xd8 << 8); /* high surrogate id */ \
high |= zabcd << 6; /* abcd bits */ \
high |= (inptr[1] & 0xf) << 2; /* efgh bits */ \
high |= (inptr[2] & 0x30) >> 4; /* ij bits */ \
\
low = (uint16_t)(0xdc << 8); /* low surrogate id */ \
low |= ((uint16_t)inptr[2] & 0xc) << 6; /* kl bits */ \
low |= (inptr[2] & 0x3) << 6; /* mn bits */ \
low |= inptr[3] & 0x3f; /* opqrst bits */ \
\
put16 (outptr, high); \
outptr += 2; \
put16 (outptr, low); \
outptr += 2; \
inptr += 4; \
continue; \
} \
else \
{ \
/* Read the possible remaining bytes. */ \
for (i = 1; i < cnt; ++i) \
{ \
uint16_t byte = inptr[i]; \
\
if ((byte & 0xc0) != 0x80) \
/* This is an illegal encoding. */ \
break; \
\
ch <<= 6; \
ch |= byte & 0x3f; \
} \
inptr += cnt; \
\
} \
} \
/* Now adjust the pointers and store the result. */ \
*((uint16_t *) outptr) = ch; \
outptr += sizeof (uint16_t); \
}
#define LOOP_NEED_FLAGS
#include <iconv/loop.c>
/* Conversion from UTF-16 to UTF-8. */
#define MIN_NEEDED_INPUT MIN_NEEDED_TO
#define MIN_NEEDED_OUTPUT MIN_NEEDED_FROM
#define MAX_NEEDED_OUTPUT MAX_NEEDED_FROM
#define LOOPFCT TO_LOOP
/* The software routine is based on the functionality of the S/390
hardware instruction (cu21) as described in the Principles of
Operation. */
#define BODY \
{ \
if (GLRO (dl_hwcap) & HWCAP_S390_ETF3EH) \
{ \
HARDWARE_CONVERT ("cu21 %0, %1"); \
if (inptr != inend) \
{ \
/* Check if the third byte is \
a valid start of a UTF-16 surrogate. */ \
if (inend - inptr == 3 && (inptr[3] & 0xfc) != 0xdc) \
STANDARD_TO_LOOP_ERR_HANDLER (3); \
\
result = __GCONV_INCOMPLETE_INPUT; \
break; \
} \
continue; \
} \
\
uint16_t c = get16 (inptr); \
\
if (__builtin_expect (c <= 0x007f, 1)) \
{ \
/* Single byte UTF-8 char. */ \
*outptr = c & 0xff; \
outptr++; \
} \
else if (c >= 0x0080 && c <= 0x07ff) \
{ \
/* Two byte UTF-8 char. */ \
\
if (__builtin_expect (outptr + 2 > outend, 0)) \
{ \
/* Overflow in the output buffer. */ \
result = __GCONV_FULL_OUTPUT; \
break; \
} \
\
outptr[0] = 0xc0; \
outptr[0] |= c >> 6; \
\
outptr[1] = 0x80; \
outptr[1] |= c & 0x3f; \
\
outptr += 2; \
} \
else if (c >= 0x0800 && c <= 0xd7ff) \
{ \
/* Three byte UTF-8 char. */ \
\
if (__builtin_expect (outptr + 3 > outend, 0)) \
{ \
/* Overflow in the output buffer. */ \
result = __GCONV_FULL_OUTPUT; \
break; \
} \
outptr[0] = 0xe0; \
outptr[0] |= c >> 12; \
\
outptr[1] = 0x80; \
outptr[1] |= (c >> 6) & 0x3f; \
\
outptr[2] = 0x80; \
outptr[2] |= c & 0x3f; \
\
outptr += 3; \
} \
else if (c >= 0xd800 && c <= 0xdbff) \
{ \
/* Four byte UTF-8 char. */ \
uint16_t low, uvwxy; \
\
if (__builtin_expect (outptr + 4 > outend, 0)) \
{ \
/* Overflow in the output buffer. */ \
result = __GCONV_FULL_OUTPUT; \
break; \
} \
inptr += 2; \
if (__builtin_expect (inptr + 2 > inend, 0)) \
{ \
result = __GCONV_INCOMPLETE_INPUT; \
break; \
} \
\
low = get16 (inptr); \
\
if ((low & 0xfc00) != 0xdc00) \
{ \
inptr -= 2; \
STANDARD_TO_LOOP_ERR_HANDLER (2); \
} \
uvwxy = ((c >> 6) & 0xf) + 1; \
outptr[0] = 0xf0; \
outptr[0] |= uvwxy >> 2; \
\
outptr[1] = 0x80; \
outptr[1] |= (uvwxy << 4) & 0x30; \
outptr[1] |= (c >> 2) & 0x0f; \
\
outptr[2] = 0x80; \
outptr[2] |= (c & 0x03) << 4; \
outptr[2] |= (low >> 6) & 0x0f; \
\
outptr[3] = 0x80; \
outptr[3] |= low & 0x3f; \
\
outptr += 4; \
} \
else \
{ \
STANDARD_TO_LOOP_ERR_HANDLER (2); \
} \
inptr += 2; \
}
#define LOOP_NEED_FLAGS
#include <iconv/loop.c>
#include <iconv/skeleton.c>

View File

@ -0,0 +1,508 @@
/* Conversion between UTF-8 and UTF-32 BE/internal.
This module uses the Z9-109 variants of the Convert Unicode
instructions.
Copyright (C) 1997-2009 Free Software Foundation, Inc.
Author: Andreas Krebbel <Andreas.Krebbel@de.ibm.com>
Based on the work by Ulrich Drepper <drepper@cygnus.com>, 1997.
Thanks to Daniel Appich who covered the relevant performance work
in his diploma thesis.
This is free software; you can redistribute it and/or
modify it under the terms of the GNU Lesser General Public
License as published by the Free Software Foundation; either
version 2.1 of the License, or (at your option) any later version.
This is distributed in the hope that it will be useful,
but WITHOUT ANY WARRANTY; without even the implied warranty of
MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the GNU
Lesser General Public License for more details.
You should have received a copy of the GNU Lesser General Public
License along with the GNU C Library; if not, write to the Free
Software Foundation, Inc., 59 Temple Place, Suite 330, Boston, MA
02111-1307 USA. */
#include <dlfcn.h>
#include <stdint.h>
#include <unistd.h>
#include <dl-procinfo.h>
#include <gconv.h>
/* UTF-32 big endian byte order mark. */
#define BOM 0x0000feffu
#define DEFINE_INIT 0
#define DEFINE_FINI 0
/* These definitions apply to the UTF-8 to UTF-32 direction. The
software implementation for UTF-8 still supports multibyte
characters up to 6 bytes whereas the hardware variant does not. */
#define MIN_NEEDED_FROM 1
#define MAX_NEEDED_FROM 6
#define MIN_NEEDED_TO 4
#define FROM_LOOP from_utf8_loop
#define TO_LOOP to_utf8_loop
#define FROM_DIRECTION (dir == from_utf8)
#define PREPARE_LOOP \
enum direction dir = ((struct utf8_data *) step->__data)->dir; \
int emit_bom = ((struct utf8_data *) step->__data)->emit_bom; \
\
if (emit_bom && !data->__internal_use \
&& data->__invocation_counter == 0) \
{ \
/* Emit the Byte Order Mark. */ \
if (__builtin_expect (outbuf + 4 > outend, 0)) \
return __GCONV_FULL_OUTPUT; \
\
put32u (outbuf, BOM); \
outbuf += 4; \
}
/* Direction of the transformation. */
enum direction
{
illegal_dir,
to_utf8,
from_utf8
};
struct utf8_data
{
enum direction dir;
int emit_bom;
};
extern int gconv_init (struct __gconv_step *step);
int
gconv_init (struct __gconv_step *step)
{
/* Determine which direction. */
struct utf8_data *new_data;
enum direction dir = illegal_dir;
int emit_bom;
int result;
emit_bom = (__strcasecmp (step->__to_name, "UTF-32//") == 0);
if (__strcasecmp (step->__from_name, "ISO-10646/UTF8/") == 0
&& (__strcasecmp (step->__to_name, "UTF-32//") == 0
|| __strcasecmp (step->__to_name, "UTF-32BE//") == 0
|| __strcasecmp (step->__to_name, "INTERNAL") == 0))
{
dir = from_utf8;
}
else if (__strcasecmp (step->__to_name, "ISO-10646/UTF8/") == 0
&& (__strcasecmp (step->__from_name, "UTF-32BE//") == 0
|| __strcasecmp (step->__from_name, "INTERNAL") == 0))
{
dir = to_utf8;
}
result = __GCONV_NOCONV;
if (dir != illegal_dir)
{
new_data = (struct utf8_data *) malloc (sizeof (struct utf8_data));
result = __GCONV_NOMEM;
if (new_data != NULL)
{
new_data->dir = dir;
new_data->emit_bom = emit_bom;
step->__data = new_data;
if (dir == from_utf8)
{
step->__min_needed_from = MIN_NEEDED_FROM;
step->__max_needed_from = MIN_NEEDED_FROM;
step->__min_needed_to = MIN_NEEDED_TO;
step->__max_needed_to = MIN_NEEDED_TO;
}
else
{
step->__min_needed_from = MIN_NEEDED_TO;
step->__max_needed_from = MIN_NEEDED_TO;
step->__min_needed_to = MIN_NEEDED_FROM;
step->__max_needed_to = MIN_NEEDED_FROM;
}
step->__stateful = 0;
result = __GCONV_OK;
}
}
return result;
}
extern void gconv_end (struct __gconv_step *data);
void
gconv_end (struct __gconv_step *data)
{
free (data->__data);
}
/* The macro for the hardware loop. This is used for both
directions. */
#define HARDWARE_CONVERT(INSTRUCTION) \
{ \
register const unsigned char* pInput asm ("8") = inptr; \
register unsigned long long inlen asm ("9") = inend - inptr; \
register unsigned char* pOutput asm ("10") = outptr; \
register unsigned long long outlen asm("11") = outend - outptr; \
uint64_t cc = 0; \
\
asm volatile ("0: " INSTRUCTION " \n\t" \
" jo 0b \n\t" \
" ipm %2 \n" \
: "+a" (pOutput), "+a" (pInput), "+d" (cc), \
"+d" (outlen), "+d" (inlen) \
: \
: "cc", "memory"); \
\
inptr = pInput; \
outptr = pOutput; \
cc >>= 28; \
\
if (cc == 1) \
{ \
result = __GCONV_FULL_OUTPUT; \
break; \
} \
else if (cc == 2) \
{ \
result = __GCONV_ILLEGAL_INPUT; \
break; \
} \
}
/* Conversion function from UTF-8 to UTF-32 internal/BE. */
#define MIN_NEEDED_INPUT MIN_NEEDED_FROM
#define MAX_NEEDED_INPUT MAX_NEEDED_FROM
#define MIN_NEEDED_OUTPUT MIN_NEEDED_TO
#define LOOPFCT FROM_LOOP
/* The software routine is copied from gconv_simple.c. */
#define BODY \
{ \
if (GLRO (dl_hwcap) & HWCAP_S390_ETF3EH) \
{ \
HARDWARE_CONVERT ("cu14 %0, %1, 1"); \
\
if (inptr != inend) \
{ \
int i; \
for (i = 1; inptr + i < inend; ++i) \
if ((inptr[i] & 0xc0) != 0x80) \
break; \
\
if (__builtin_expect (inptr + i == inend, 1)) \
{ \
result = __GCONV_INCOMPLETE_INPUT; \
break; \
} \
STANDARD_FROM_LOOP_ERR_HANDLER (i); \
} \
continue; \
} \
\
/* Next input byte. */ \
uint32_t ch = *inptr; \
\
if (__builtin_expect (ch < 0x80, 1)) \
{ \
/* One byte sequence. */ \
++inptr; \
} \
else \
{ \
uint_fast32_t cnt; \
uint_fast32_t i; \
\
if (ch >= 0xc2 && ch < 0xe0) \
{ \
/* We expect two bytes. The first byte cannot be 0xc0 or \
0xc1, otherwise the wide character could have been \
represented using a single byte. */ \
cnt = 2; \
ch &= 0x1f; \
} \
else if (__builtin_expect ((ch & 0xf0) == 0xe0, 1)) \
{ \
/* We expect three bytes. */ \
cnt = 3; \
ch &= 0x0f; \
} \
else if (__builtin_expect ((ch & 0xf8) == 0xf0, 1)) \
{ \
/* We expect four bytes. */ \
cnt = 4; \
ch &= 0x07; \
} \
else if (__builtin_expect ((ch & 0xfc) == 0xf8, 1)) \
{ \
/* We expect five bytes. */ \
cnt = 5; \
ch &= 0x03; \
} \
else if (__builtin_expect ((ch & 0xfe) == 0xfc, 1)) \
{ \
/* We expect six bytes. */ \
cnt = 6; \
ch &= 0x01; \
} \
else \
{ \
/* Search the end of this ill-formed UTF-8 character. This \
is the next byte with (x & 0xc0) != 0x80. */ \
i = 0; \
do \
++i; \
while (inptr + i < inend \
&& (*(inptr + i) & 0xc0) == 0x80 \
&& i < 5); \
\
errout: \
STANDARD_FROM_LOOP_ERR_HANDLER (i); \
} \
\
if (__builtin_expect (inptr + cnt > inend, 0)) \
{ \
/* We don't have enough input. But before we report \
that check that all the bytes are correct. */ \
for (i = 1; inptr + i < inend; ++i) \
if ((inptr[i] & 0xc0) != 0x80) \
break; \
\
if (__builtin_expect (inptr + i == inend, 1)) \
{ \
result = __GCONV_INCOMPLETE_INPUT; \
break; \
} \
\
goto errout; \
} \
\
/* Read the possible remaining bytes. */ \
for (i = 1; i < cnt; ++i) \
{ \
uint32_t byte = inptr[i]; \
\
if ((byte & 0xc0) != 0x80) \
/* This is an illegal encoding. */ \
break; \
\
ch <<= 6; \
ch |= byte & 0x3f; \
} \
\
/* If i < cnt, some trail byte was not >= 0x80, < 0xc0. \
If cnt > 2 and ch < 2^(5*cnt-4), the wide character ch could \
have been represented with fewer than cnt bytes. */ \
if (i < cnt || (cnt > 2 && (ch >> (5 * cnt - 4)) == 0)) \
{ \
/* This is an illegal encoding. */ \
goto errout; \
} \
\
inptr += cnt; \
} \
\
/* Now adjust the pointers and store the result. */ \
*((uint32_t *) outptr) = ch; \
outptr += sizeof (uint32_t); \
}
#define LOOP_NEED_FLAGS
#define STORE_REST \
{ \
/* We store the remaining bytes while converting them into the UCS4 \
format. We can assume that the first byte in the buffer is \
correct and that it requires a larger number of bytes than there \
are in the input buffer. */ \
wint_t ch = **inptrp; \
size_t cnt, r; \
\
state->__count = inend - *inptrp; \
\
if (ch >= 0xc2 && ch < 0xe0) \
{ \
/* We expect two bytes. The first byte cannot be 0xc0 or \
0xc1, otherwise the wide character could have been \
represented using a single byte. */ \
cnt = 2; \
ch &= 0x1f; \
} \
else if (__builtin_expect ((ch & 0xf0) == 0xe0, 1)) \
{ \
/* We expect three bytes. */ \
cnt = 3; \
ch &= 0x0f; \
} \
else if (__builtin_expect ((ch & 0xf8) == 0xf0, 1)) \
{ \
/* We expect four bytes. */ \
cnt = 4; \
ch &= 0x07; \
} \
else if (__builtin_expect ((ch & 0xfc) == 0xf8, 1)) \
{ \
/* We expect five bytes. */ \
cnt = 5; \
ch &= 0x03; \
} \
else \
{ \
/* We expect six bytes. */ \
cnt = 6; \
ch &= 0x01; \
} \
\
/* The first byte is already consumed. */ \
r = cnt - 1; \
while (++(*inptrp) < inend) \
{ \
ch <<= 6; \
ch |= **inptrp & 0x3f; \
--r; \
} \
\
/* Shift for the so far missing bytes. */ \
ch <<= r * 6; \
\
/* Store the number of bytes expected for the entire sequence. */ \
state->__count |= cnt << 8; \
\
/* Store the value. */ \
state->__value.__wch = ch; \
}
#define UNPACK_BYTES \
{ \
static const unsigned char inmask[5] = { 0xc0, 0xe0, 0xf0, 0xf8, 0xfc }; \
wint_t wch = state->__value.__wch; \
size_t ntotal = state->__count >> 8; \
\
inlen = state->__count & 255; \
\
bytebuf[0] = inmask[ntotal - 2]; \
\
do \
{ \
if (--ntotal < inlen) \
bytebuf[ntotal] = 0x80 | (wch & 0x3f); \
wch >>= 6; \
} \
while (ntotal > 1); \
\
bytebuf[0] |= wch; \
}
#define CLEAR_STATE \
state->__count = 0
#include <iconv/loop.c>
/* Conversion from UTF-32 internal/BE to UTF-8. */
#define MIN_NEEDED_INPUT MIN_NEEDED_TO
#define MIN_NEEDED_OUTPUT MIN_NEEDED_FROM
#define MAX_NEEDED_OUTPUT MAX_NEEDED_FROM
#define LOOPFCT TO_LOOP
/* The software routine mimics the S/390 cu41 instruction. */
#define BODY \
{ \
if (GLRO (dl_hwcap) & HWCAP_S390_ETF3EH) \
{ \
HARDWARE_CONVERT ("cu41 %0, %1"); \
\
if (inptr != inend) \
{ \
result = __GCONV_INCOMPLETE_INPUT; \
break; \
} \
continue; \
} \
\
uint32_t wc = *((const uint32_t *) inptr); \
\
if (__builtin_expect (wc <= 0x7f, 1)) \
{ \
/* Single UTF-8 char. */ \
*outptr = (uint8_t)wc; \
outptr++; \
} \
else if (wc <= 0x7ff) \
{ \
/* Two UTF-8 chars. */ \
if (__builtin_expect (outptr + 2 > outend, 0)) \
{ \
/* Overflow in the output buffer. */ \
result = __GCONV_FULL_OUTPUT; \
break; \
} \
\
outptr[0] = 0xc0; \
outptr[0] |= wc >> 6; \
\
outptr[1] = 0x80; \
outptr[1] |= wc & 0x3f; \
\
outptr += 2; \
} \
else if (wc <= 0xffff) \
{ \
/* Three UTF-8 chars. */ \
if (__builtin_expect (outptr + 3 > outend, 0)) \
{ \
/* Overflow in the output buffer. */ \
result = __GCONV_FULL_OUTPUT; \
break; \
} \
outptr[0] = 0xe0; \
outptr[0] |= wc >> 12; \
\
outptr[1] = 0x80; \
outptr[1] |= (wc >> 6) & 0x3f; \
\
outptr[2] = 0x80; \
outptr[2] |= wc & 0x3f; \
\
outptr += 3; \
} \
else if (wc <= 0x10ffff) \
{ \
/* Four UTF-8 chars. */ \
if (__builtin_expect (outptr + 4 > outend, 0)) \
{ \
/* Overflow in the output buffer. */ \
result = __GCONV_FULL_OUTPUT; \
break; \
} \
outptr[0] = 0xf0; \
outptr[0] |= wc >> 18; \
\
outptr[1] = 0x80; \
outptr[1] |= (wc >> 12) & 0x3f; \
\
outptr[2] = 0x80; \
outptr[2] |= (wc >> 6) & 0x3f; \
\
outptr[3] = 0x80; \
outptr[3] |= wc & 0x3f; \
\
outptr += 4; \
} \
else \
{ \
STANDARD_TO_LOOP_ERR_HANDLER (4); \
} \
inptr += 4; \
}
#define LOOP_NEED_FLAGS
#include <iconv/loop.c>
#include <iconv/skeleton.c>