scuffed-code/icu4c/source/extra/uconv/uconv.cpp

1101 lines
34 KiB
C++
Raw Normal View History

/*****************************************************************************
*
* Copyright (C) 1999-2003, International Business Machines
* Corporation and others. All Rights Reserved.
*
******************************************************************************/
/*
* uconv(1): an iconv(1)-like converter using ICU.
*
* Original code by Jonas Utterstr<EFBFBD>m <jonas.utterstrom@vittran.norrnod.se>
* contributed in 1999.
*
* Conversion to the C conversion API and many improvements by
* Yves Arrouye <yves@realnames.com>, current maintainer.
*
*/
#include <unicode/utypes.h>
#include <unicode/ucnv.h>
#include <unicode/unistr.h>
#include <unicode/translit.h>
#include <unicode/uclean.h>
#include <stdio.h>
#include <errno.h>
#include <string.h>
#include <stdlib.h>
#include "cmemory.h"
#include "cstring.h"
#include "ustrfmt.h"
#include "unicode/uwmsg.h"
#ifdef WIN32
#include <string.h>
#include <io.h>
#include <fcntl.h>
#endif
#ifdef UCONVMSG_LINK
/* below from the README */
#include "unicode/utypes.h"
#include "unicode/udata.h"
U_CFUNC char uconvmsg_dat[];
#endif
#define DEFAULT_BUFSZ 4096
#define UCONVMSG "uconvmsg"
static UResourceBundle *gBundle = 0; /* Bundle containing messages. */
/*
* Initialize the message bundle so that message strings can be fetched
* by u_wmsg().
*
*/
static void initMsg(const char *pname) {
static int ps = 0;
if (!ps) {
char dataPath[2048]; /* XXX Sloppy: should be PATH_MAX. */
UErrorCode err = U_ZERO_ERROR;
ps = 1;
/* Set up our static data - if any */
#ifdef UCONVMSG_LINK
udata_setAppData(UCONVMSG, (const void*) uconvmsg_dat, &err);
if (U_FAILURE(err)) {
fprintf(stderr, "%s: warning, problem installing our static resource bundle data uconvmsg: %s - trying anyways.\n",
pname, u_errorName(err));
err = U_ZERO_ERROR; /* It may still fail */
}
#endif
/* Get messages. */
gBundle = u_wmsg_setPath(UCONVMSG, &err);
if (U_FAILURE(err)) {
fprintf(stderr,
"%s: warning: couldn't open bundle %s: %s\n",
pname, UCONVMSG, u_errorName(err));
#ifdef UCONVMSG_LINK
fprintf(stderr,
"%s: setAppData was called, internal data %s failed to load\n",
pname, UCONVMSG);
#endif
err = U_ZERO_ERROR;
/* that was try #1, try again with a path */
uprv_strcpy(dataPath, u_getDataDirectory());
uprv_strcat(dataPath, U_FILE_SEP_STRING);
uprv_strcat(dataPath, UCONVMSG);
gBundle = u_wmsg_setPath(dataPath, &err);
if (U_FAILURE(err)) {
fprintf(stderr,
"%s: warning: still couldn't open bundle %s: %s\n",
pname, dataPath, u_errorName(err));
2002-04-02 01:17:28 +00:00
fprintf(stderr, "%s: warning: messages will not be displayed\n", pname);
}
}
}
}
/* Mapping of callback names to the callbacks passed to the converter
API. */
static struct callback_ent {
const char *name;
UConverterFromUCallback fromu;
const void *fromuctxt;
UConverterToUCallback tou;
const void *touctxt;
} transcode_callbacks[] = {
{ "substitute",
UCNV_FROM_U_CALLBACK_SUBSTITUTE, 0,
UCNV_TO_U_CALLBACK_SUBSTITUTE, 0 },
{ "skip",
UCNV_FROM_U_CALLBACK_SKIP, 0,
UCNV_TO_U_CALLBACK_SKIP, 0 },
{ "stop",
UCNV_FROM_U_CALLBACK_STOP, 0,
UCNV_TO_U_CALLBACK_STOP, 0 },
{ "escape",
UCNV_FROM_U_CALLBACK_ESCAPE, 0,
UCNV_TO_U_CALLBACK_ESCAPE, 0},
{ "escape-icu",
UCNV_FROM_U_CALLBACK_ESCAPE, UCNV_ESCAPE_ICU,
UCNV_TO_U_CALLBACK_ESCAPE, UCNV_ESCAPE_ICU },
{ "escape-java",
UCNV_FROM_U_CALLBACK_ESCAPE, UCNV_ESCAPE_JAVA,
UCNV_TO_U_CALLBACK_ESCAPE, UCNV_ESCAPE_JAVA },
{ "escape-c",
UCNV_FROM_U_CALLBACK_ESCAPE, UCNV_ESCAPE_C,
UCNV_TO_U_CALLBACK_ESCAPE, UCNV_ESCAPE_C },
{ "escape-xml",
UCNV_FROM_U_CALLBACK_ESCAPE, UCNV_ESCAPE_XML_HEX,
UCNV_TO_U_CALLBACK_ESCAPE, UCNV_ESCAPE_XML_HEX },
{ "escape-xml-hex",
UCNV_FROM_U_CALLBACK_ESCAPE, UCNV_ESCAPE_XML_HEX,
UCNV_TO_U_CALLBACK_ESCAPE, UCNV_ESCAPE_XML_HEX },
{ "escape-xml-dec",
UCNV_FROM_U_CALLBACK_ESCAPE, UCNV_ESCAPE_XML_DEC,
UCNV_TO_U_CALLBACK_ESCAPE, UCNV_ESCAPE_XML_DEC },
{ "escape-unicode", UCNV_FROM_U_CALLBACK_ESCAPE, UCNV_ESCAPE_UNICODE,
UCNV_TO_U_CALLBACK_ESCAPE, UCNV_ESCAPE_UNICODE }
};
/* Return a pointer to a callback record given its name. */
static const struct callback_ent *findCallback(const char *name) {
int i, count =
sizeof(transcode_callbacks) / sizeof(*transcode_callbacks);
/* We'll do a linear search, there aren't many of them and bsearch()
may not be that portable. */
for (i = 0; i < count; ++i) {
if (!uprv_stricmp(name, transcode_callbacks[i].name)) {
return &transcode_callbacks[i];
}
}
return 0;
}
/* Print converter information. If lookfor is set, only that converter will
be printed, otherwise all converters will be printed. If canon is non
zero, tags and aliases for each converter are printed too, in the format
expected for convrters.txt(5). */
static int printConverters(const char *pname, const char *lookfor,
int canon)
{
UErrorCode err = U_ZERO_ERROR;
int32_t num;
uint16_t num_stds;
const char **stds;
/* If there is a specified name, just handle that now. */
if (lookfor) {
if (!canon) {
printf("%s\n", lookfor);
return 0;
} else {
/* Because we are printing a canonical name, we need the
true converter name. We've done that already except for
the default name (because we want to print the exact
name one would get when calling ucnv_getDefaultName()
in non-canon mode). But since we do not know at this
point if we have the default name or something else, we
need to normalize again to the canonical converter
name. */
const char *truename = ucnv_getAlias(lookfor, 0, &err);
if (U_SUCCESS(err)) {
lookfor = truename;
} else {
err = U_ZERO_ERROR;
}
}
}
/* Print converter names. We come here for one of two reasons: we
are printing all the names (lookfor was null), or we have a
single converter to print but in canon mode, hence we need to
get to it in order to print everything. */
num = ucnv_countAvailable();
if (num <= 0) {
initMsg(pname);
u_wmsg(stderr, "cantGetNames");
return -1;
}
if (lookfor) {
num = 1; /* We know where we want to be. */
}
num_stds = ucnv_countStandards();
stds = (const char **) uprv_malloc(num_stds * sizeof(*stds));
if (!stds) {
u_wmsg(stderr, "cantGetTag", u_wmsg_errorName(U_MEMORY_ALLOCATION_ERROR));
return -1;
} else {
uint16_t s;
for (s = 0; s < num_stds; ++s) {
stds[s] = ucnv_getStandard(s, &err);
if (U_FAILURE(err)) {
u_wmsg(stderr, "cantGetTag", u_wmsg_errorName(err));
return -1;
}
}
}
for (int32_t i = 0; i < num; i++) {
const char *name;
uint16_t num_aliases;
/* Set the name either to what we are looking for, or
to the current converter name. */
if (lookfor) {
name = lookfor;
} else {
name = ucnv_getAvailableName(i);
}
/* Get all the aliases associated to the name. */
err = U_ZERO_ERROR;
num_aliases = ucnv_countAliases(name, &err);
if (U_FAILURE(err)) {
printf("%s", name);
UnicodeString str(name, (int32_t)(uprv_strlen(name) + 1));
putchar('\t');
u_wmsg(stderr, "cantGetAliases", str.getBuffer(),
u_wmsg_errorName(err));
return -1;
} else {
uint16_t a, s, t;
/* Write all the aliases and their tags. */
for (a = 0; a < num_aliases; ++a) {
const char *alias = ucnv_getAlias(name, a, &err);
if (U_FAILURE(err)) {
UnicodeString str(name, (int32_t)(uprv_strlen(name) + 1));
putchar('\t');
u_wmsg(stderr, "cantGetAliases", str.getBuffer(),
u_wmsg_errorName(err));
return -1;
}
printf("%s", alias);
/* Look (slowly, linear searching) for a tag. */
if (canon) {
for (s = t = 0; s < num_stds; ++s) {
const char *standard =
ucnv_getStandardName(name, stds[s], &err);
if (U_SUCCESS(err) && standard) {
if (!strcmp(standard, alias)) {
if (!t) {
printf(" {");
t = 1;
}
printf(" %s", stds[s]);
}
}
}
if (t) {
printf(" }");
}
}
/* Move on. */
if (a < num_aliases - 1) {
putchar(a || !canon ? ' ' : '\t');
}
}
}
/* Terminate this entry. */
if (canon) {
putchar('\n');
} else if (i < num - 1) {
putchar(' ');
}
}
/* Free temporary data. */
uprv_free(stds);
/* Success. */
return 0;
}
/* Print all available transliterators. If canon is non zero, print
one transliterator per line. */
static int printTransliterators(int canon)
{
#if UCONFIG_NO_TRANSLITERATION
printf("no transliterators available because of UCONFIG_NO_TRANSLITERATION, see uconfig.h\n");
return 1;
#else
int32_t numtrans = utrans_countAvailableIDs(), i;
int buflen = 512;
char *buf = (char *) uprv_malloc(buflen);
char staticbuf[512];
char sepchar = canon ? '\n' : ' ';
if (!buf) {
buf = staticbuf;
buflen = sizeof(staticbuf);
}
for (i = 0; i < numtrans; ++i) {
int32_t len = utrans_getAvailableID(i, buf, buflen);
if (len >= buflen - 1) {
if (buf != staticbuf) {
buflen <<= 1;
if (buflen < len) {
buflen = len + 64;
}
buf = (char *) uprv_realloc(buf, buflen);
if (!buf) {
buf = staticbuf;
buflen = sizeof(staticbuf);
}
}
utrans_getAvailableID(i, buf, buflen);
if (len >= buflen) {
uprv_strcpy(buf + buflen - 4, "..."); /* Truncate the name. */
}
}
printf("%s", buf);
if (i < numtrans - 1) {
putchar(sepchar);
}
}
/* Add a terminating newline if needed. */
if (sepchar != '\n') {
putchar('\n');
}
/* Free temporary data. */
if (buf != staticbuf) {
uprv_free(buf);
}
/* Success. */
return 0;
#endif
}
/* Return the offset of a byte in its source, given the from and to offsets
vectors and the byte offset itself. */
static inline int32_t dataOffset(int32_t whereto,
const int32_t *fromoffsets, int32_t fromsz,
const int32_t *tooffsets, int32_t tosz) {
if (whereto >= 0 && whereto < tosz) {
whereto = tooffsets[whereto];
if (whereto >= 0 && whereto < fromsz) {
return fromoffsets[whereto];
}
}
return 0;
}
// Convert a file from one encoding to another
static UBool convertFile(const char *pname,
const char *fromcpage,
UConverterToUCallback toucallback,
const void *touctxt,
const char *tocpage,
UConverterFromUCallback fromucallback,
const void *fromuctxt,
int fallback,
size_t bufsz,
const char *translit,
const char *infilestr,
FILE * outfile, int verbose)
{
FILE *infile;
UBool ret = TRUE;
UConverter *convfrom = 0;
UConverter *convto = 0;
UErrorCode err = U_ZERO_ERROR;
UBool flush;
const char *cbufp;
char *bufp;
char *buf = 0;
uint32_t infoffset = 0, outfoffset = 0; /* Where we are in the file, for error reporting. */
const UChar *unibufbp;
UChar *unibufp;
UChar *unibuf = 0;
int32_t *fromoffsets = 0, *tooffsets = 0;
size_t rd, wr, tobufsz;
#if !UCONFIG_NO_TRANSLITERATION
Transliterator *t = 0; // Transliterator acting on Unicode data.
#endif
UnicodeString u; // String to do the transliteration.
// Open the correct input file or connect to stdin for reading input
if (infilestr != 0 && strcmp(infilestr, "-")) {
infile = fopen(infilestr, "rb");
if (infile == 0) {
UnicodeString str1(infilestr, "");
2002-04-02 01:17:28 +00:00
str1.append((UChar32) 0);
UnicodeString str2(strerror(errno), "");
2002-04-02 01:17:28 +00:00
str2.append((UChar32) 0);
initMsg(pname);
u_wmsg(stderr, "cantOpenInputF", str1.getBuffer(), str2.getBuffer());
return FALSE;
}
} else {
infilestr = "-";
infile = stdin;
#ifdef WIN32
if (setmode(fileno(stdin), O_BINARY) == -1) {
2002-04-02 01:17:28 +00:00
initMsg(pname);
u_wmsg(stderr, "cantSetInBinMode");
return FALSE;
}
#endif
}
if (verbose) {
fprintf(stderr, "%s:\n", infilestr);
}
#if !UCONFIG_NO_TRANSLITERATION
// Create transliterator as needed.
if (translit != NULL && *translit) {
UParseError parse;
UnicodeString str(translit), pestr;
/* Create from rules or by ID as needed. */
parse.line = -1;
if (uprv_strchr(translit, ':') || uprv_strchr(translit, '>') || uprv_strchr(translit, '<') || uprv_strchr(translit, '>')) {
t = Transliterator::createFromRules("Uconv", str, UTRANS_FORWARD, parse, err);
} else {
t = Transliterator::createInstance(translit, UTRANS_FORWARD, err);
}
if (U_FAILURE(err)) {
2002-04-02 01:17:28 +00:00
str.append((UChar32) 0);
initMsg(pname);
if (parse.line >= 0) {
UChar linebuf[20], offsetbuf[20];
uprv_itou(linebuf, 20, parse.line, 10, 0);
uprv_itou(offsetbuf, 20, parse.offset, 10, 0);
u_wmsg(stderr, "cantCreateTranslitParseErr", str.getBuffer(),
u_wmsg_errorName(err), linebuf, offsetbuf);
} else {
u_wmsg(stderr, "cantCreateTranslit", str.getBuffer(),
u_wmsg_errorName(err));
2002-04-02 01:17:28 +00:00
}
if (t) {
delete t;
t = 0;
}
goto error_exit;
}
}
#endif
// Create codepage converter. If the codepage or its aliases weren't
// available, it returns NULL and a failure code. We also set the
// callbacks, and return errors in the same way.
convfrom = ucnv_open(fromcpage, &err);
if (U_FAILURE(err)) {
UnicodeString str(fromcpage, (int32_t)(uprv_strlen(fromcpage) + 1));
initMsg(pname);
u_wmsg(stderr, "cantOpenFromCodeset", str.getBuffer(),
u_wmsg_errorName(err));
goto error_exit;
}
ucnv_setToUCallBack(convfrom, toucallback, touctxt, 0, 0, &err);
if (U_FAILURE(err)) {
initMsg(pname);
u_wmsg(stderr, "cantSetCallback", u_wmsg_errorName(err));
goto error_exit;
}
convto = ucnv_open(tocpage, &err);
if (U_FAILURE(err)) {
UnicodeString str(tocpage, (int32_t)(uprv_strlen(tocpage) + 1));
initMsg(pname);
u_wmsg(stderr, "cantOpenToCodeset", str.getBuffer(),
u_wmsg_errorName(err));
goto error_exit;
}
ucnv_setFromUCallBack(convto, fromucallback, fromuctxt, 0, 0, &err);
if (U_FAILURE(err)) {
initMsg(pname);
u_wmsg(stderr, "cantSetCallback", u_wmsg_errorName(err));
goto error_exit;
}
ucnv_setFallback(convto, fallback);
// To ensure that the buffer always is of enough size, we
// must take the worst case scenario, that is the character in
// the codepage that uses the most bytes and multiply it against
// the buffer size.
// use bufsz+1 to allow for additional BOM/signature character (U+FEFF)
tobufsz = (bufsz+1) * ucnv_getMaxCharSize(convto);
buf = new char[tobufsz];
unibuf = new UChar[bufsz];
fromoffsets = new int32_t[bufsz];
tooffsets = new int32_t[tobufsz];
// OK, we can convert now.
do {
char willexit = 0;
rd = fread(buf, 1, bufsz, infile);
if (ferror(infile) != 0) {
UnicodeString str(strerror(errno));
str.append((UChar32) 0);
initMsg(pname);
u_wmsg(stderr, "cantRead", str.getBuffer());
goto error_exit;
}
// Convert the read buffer into the new coding
// After the call 'unibufp' will be placed on the last
// character that was converted in the 'unibuf'.
// Also the 'cbufp' is positioned on the last converted
// character.
// At the last conversion in the file, flush should be set to
// true so that we get all characters converted
//
// The converter must be flushed at the end of conversion so
// that characters on hold also will be written.
unibufp = unibuf;
cbufp = buf;
flush = rd != bufsz;
ucnv_toUnicode(convfrom, &unibufp, unibufp + bufsz, &cbufp,
cbufp + rd, fromoffsets, flush, &err);
infoffset += (uint32_t)(cbufp - buf);
if (U_FAILURE(err)) {
char pos[32];
sprintf(pos, "%u", infoffset - 1);
UnicodeString str(pos, (int32_t)(uprv_strlen(pos) + 1));
initMsg(pname);
u_wmsg(stderr, "problemCvtToU", str.getBuffer(), u_wmsg_errorName(err));
willexit = 1;
err = U_ZERO_ERROR; /* reset the error for the rest of the conversion. */
}
// At the last conversion, the converted characters should be
// equal to number of chars read.
if (flush && !willexit && cbufp != (buf + rd)) {
char pos[32];
sprintf(pos, "%u", infoffset);
UnicodeString str(pos, (int32_t)(uprv_strlen(pos) + 1));
initMsg(pname);
u_wmsg(stderr, "premEndInput", str.getBuffer());
willexit = 1;
}
// Prepare to transliterate and convert. Transliterate if needed.
#if !UCONFIG_NO_TRANSLITERATION
if (t) {
u.setTo(unibuf, (int32_t)(unibufp - unibuf)); // Copy into string.
t->transliterate(u);
} else
#endif
{
u.setTo(unibuf, (int32_t)(unibufp - unibuf), (int32_t)(bufsz)); // Share the buffer.
}
int32_t ulen = u.length();
// Convert the Unicode buffer into the destination codepage
// Again 'bufp' will be placed on the last converted character
// And 'unibufbp' will be placed on the last converted unicode character
// At the last conversion flush should be set to true to ensure that
// all characters left get converted
const UChar *unibufu = unibufbp = u.getBuffer();
do {
int32_t len = ulen > (int32_t)bufsz ? (int32_t)bufsz : ulen;
bufp = buf;
unibufp = (UChar *) (unibufbp + len);
ucnv_fromUnicode(convto, &bufp, bufp + tobufsz,
&unibufbp,
unibufp,
tooffsets, flush, &err);
if (U_FAILURE(err)) {
const char *errtag;
char pos[32];
uint32_t erroffset =
dataOffset((int32_t)(bufp - buf - 1), fromoffsets, (int32_t)(bufsz), tooffsets, (int32_t)(tobufsz));
int32_t ferroffset = (int32_t)(infoffset - (unibufp - unibufu) + erroffset);
if ((int32_t) ferroffset < 0) {
ferroffset = (int32_t)(outfoffset + (bufp - buf));
errtag = "problemCvtFromUOut";
} else {
errtag = "problemCvtFromU";
}
sprintf(pos, "%u", ferroffset);
UnicodeString str(pos, (int32_t)(uprv_strlen(pos) + 1));
initMsg(pname);
u_wmsg(stderr, errtag, str.getBuffer(),
u_wmsg_errorName(err));
willexit = 1;
}
// At the last conversion, the converted characters should be equal to number
// of consumed characters.
if (flush && !willexit && unibufbp != (unibufu + (size_t) (unibufp - unibufu))) {
char pos[32];
sprintf(pos, "%u", infoffset);
UnicodeString str(pos, (int32_t)(uprv_strlen(pos) + 1));
initMsg(pname);
u_wmsg(stderr, "premEnd", str.getBuffer());
willexit = 1;
}
// Finally, write the converted buffer to the output file
rd = (size_t) (bufp - buf);
outfoffset += (int32_t)(wr = fwrite(buf, 1, rd, outfile));
if (wr != rd) {
UnicodeString str(strerror(errno), "");
initMsg(pname);
u_wmsg(stderr, "cantWrite", str.getBuffer());
willexit = 1;
}
if (willexit) {
goto error_exit;
}
} while ((ulen -= (int32_t)(bufsz)) > 0);
} while (!flush); // Stop when we have flushed the
// converters (this means that it's
// the end of output)
goto normal_exit;
error_exit:
ret = FALSE;
normal_exit:
// Cleanup.
if (convfrom) ucnv_close(convfrom);
if (convto) ucnv_close(convto);
#if !UCONFIG_NO_TRANSLITERATION
if (t) delete t;
#endif
if (buf) delete[] buf;
if (unibuf) delete[] unibuf;
if (fromoffsets) delete[] fromoffsets;
if (tooffsets) delete[] tooffsets;
if (infile != stdin) {
fclose(infile);
}
return ret;
}
static void usage(const char *pname, int ecode) {
const UChar *msg;
int32_t msgLen;
UErrorCode err = U_ZERO_ERROR;
2002-04-02 01:17:28 +00:00
FILE *fp = ecode ? stderr : stdout;
int res;
initMsg(pname);
msg =
ures_getStringByKey(gBundle, ecode ? "lcUsageWord" : "ucUsageWord",
&msgLen, &err);
UnicodeString upname(pname, (int32_t)(uprv_strlen(pname) + 1));
UnicodeString mname(msg, msgLen + 1);
res = u_wmsg(fp, "usage", mname.getBuffer(), upname.getBuffer());
if (!ecode) {
if (!res) {
2002-04-02 01:17:28 +00:00
fputc('\n', fp);
}
if (!u_wmsg(fp, "help")) {
/* Now dump callbacks and finish. */
int i, count =
sizeof(transcode_callbacks) / sizeof(*transcode_callbacks);
for (i = 0; i < count; ++i) {
fprintf(fp, " %s", transcode_callbacks[i].name);
2002-04-02 01:17:28 +00:00
}
fputc('\n', fp);
2002-04-02 01:17:28 +00:00
}
}
exit(ecode);
}
int main(int argc, char **argv)
{
FILE *outfile;
int ret = 0;
int seenf = 0;
size_t bufsz = DEFAULT_BUFSZ;
const char *fromcpage = 0;
const char *tocpage = 0;
const char *translit = 0;
const char *outfilestr = 0;
int fallback = 0;
UConverterFromUCallback fromucallback = UCNV_FROM_U_CALLBACK_STOP;
const void *fromuctxt = 0;
UConverterToUCallback toucallback = UCNV_TO_U_CALLBACK_STOP;
const void *touctxt = 0;
char **iter;
char **end = argv + argc;
const char *pname;
int printConvs = 0, printCanon = 0;
const char *printName = 0;
int printTranslits = 0;
int verbose = 0;
UErrorCode status = U_ZERO_ERROR;
/* Initialize ICU */
u_init(&status);
if (U_FAILURE(status)) {
fprintf(stderr, "%s: can not initialize ICU. status = %s\n",
argv[0], u_errorName(status));
exit(1);
}
2002-03-22 09:50:02 +00:00
// Get and prettify pname.
pname = uprv_strrchr(*argv, U_FILE_SEP_CHAR);
#ifdef WIN32
if (!pname) {
pname = uprv_strrchr(*argv, '/');
}
#endif
if (!pname) {
pname = *argv;
} else {
++pname;
2002-03-22 09:50:02 +00:00
}
// First, get the arguments from command-line
// to know the codepages to convert between
// XXX When you add to this loop, you need to add to the similar loop
// below.
for (iter = argv + 1; iter != end; iter++) {
// Check for from charset
if (strcmp("-f", *iter) == 0 || !strcmp("--from-code", *iter)) {
iter++;
if (iter != end)
fromcpage = *iter;
else
usage(pname, 1);
} else if (strcmp("-t", *iter) == 0 || !strcmp("--to-code", *iter)) {
iter++;
if (iter != end)
tocpage = *iter;
else
usage(pname, 1);
} else if (strcmp("-x", *iter) == 0) {
iter++;
if (iter != end)
translit = *iter;
else
usage(pname, 1);
} else if (!strcmp("--fallback", *iter)) {
fallback = 1;
} else if (!strcmp("--no-fallback", *iter)) {
fallback = 0;
} else if (strcmp("-b", *iter) == 0 || !strcmp("--block-size", *iter)) {
iter++;
if (iter != end) {
bufsz = atoi(*iter);
if ((int) bufsz <= 0) {
initMsg(pname);
UnicodeString str(*iter);
initMsg(pname);
u_wmsg(stderr, "badBlockSize", str.getBuffer());
return 3;
}
} else {
usage(pname, 1);
}
} else if (strcmp("-l", *iter) == 0 || !strcmp("--list", *iter)) {
if (printTranslits) {
usage(pname, 1);
}
printConvs = 1;
} else if (strcmp("--default-code", *iter) == 0) {
if (printTranslits) {
usage(pname, 1);
}
printName = ucnv_getDefaultName();
} else if (strcmp("--list-code", *iter) == 0) {
if (printTranslits) {
usage(pname, 1);
}
iter++;
if (iter != end) {
UErrorCode e = U_ZERO_ERROR;
printName = ucnv_getAlias(*iter, 0, &e);
if (U_FAILURE(e) || !printName) {
UnicodeString str(*iter);
initMsg(pname);
u_wmsg(stderr, "noSuchCodeset", str.getBuffer());
return 2;
}
} else
usage(pname, 1);
} else if (strcmp("--canon", *iter) == 0) {
printCanon = 1;
} else if (strcmp("-L", *iter) == 0
|| !strcmp("--list-transliterators", *iter)) {
if (printConvs) {
usage(pname, 1);
}
printTranslits = 1;
} else if (strcmp("-h", *iter) == 0 || !strcmp("-?", *iter)
|| !strcmp("--help", *iter)) {
usage(pname, 0);
} else if (!strcmp("-c", *iter)) {
fromucallback = UCNV_FROM_U_CALLBACK_SKIP;
} else if (!strcmp("--to-callback", *iter)) {
iter++;
if (iter != end) {
const struct callback_ent *cbe = findCallback(*iter);
if (cbe) {
fromucallback = cbe->fromu;
fromuctxt = cbe->fromuctxt;
} else {
UnicodeString str(*iter);
initMsg(pname);
u_wmsg(stderr, "unknownCallback", str.getBuffer());
return 4;
}
} else {
usage(pname, 1);
}
} else if (!strcmp("--from-callback", *iter)) {
iter++;
if (iter != end) {
const struct callback_ent *cbe = findCallback(*iter);
if (cbe) {
toucallback = cbe->tou;
touctxt = cbe->touctxt;
} else {
UnicodeString str(*iter);
initMsg(pname);
u_wmsg(stderr, "unknownCallback", str.getBuffer());
return 4;
}
} else {
usage(pname, 1);
}
} else if (!strcmp("-i", *iter)) {
toucallback = UCNV_TO_U_CALLBACK_SKIP;
} else if (!strcmp("--callback", *iter)) {
iter++;
if (iter != end) {
const struct callback_ent *cbe = findCallback(*iter);
if (cbe) {
fromucallback = cbe->fromu;
fromuctxt = cbe->fromuctxt;
toucallback = cbe->tou;
touctxt = cbe->touctxt;
} else {
UnicodeString str(*iter);
initMsg(pname);
u_wmsg(stderr, "unknownCallback", str.getBuffer());
return 4;
}
} else {
usage(pname, 1);
}
} else if (!strcmp("-s", *iter) || !strcmp("--silent", *iter)) {
verbose = 0;
} else if (!strcmp("-v", *iter) || !strcmp("--verbose", *iter)) {
verbose = 1;
} else if (!strcmp("-V", *iter) || !strcmp("--version", *iter)) {
printf("%s v2.0\n", pname);
return 0;
} else if (!strcmp("-o", *iter) || !strcmp("--output", *iter)) {
++iter;
if (iter != end && !outfilestr) {
outfilestr = *iter;
} else {
usage(pname, 1);
}
} else if (**iter == '-' && (*iter)[1]) {
usage(pname, 1);
}
}
if (printConvs || printName) {
return printConverters(pname, printName, printCanon) ? 2 : 0;
} else if (printTranslits) {
return printTransliterators(printCanon) ? 3 : 0;
}
if (!fromcpage || !uprv_strcmp(fromcpage, "-")) {
fromcpage = ucnv_getDefaultName();
}
if (!tocpage || !uprv_strcmp(tocpage, "-")) {
tocpage = ucnv_getDefaultName();
}
// Open the correct output file or connect to stdout for reading input
if (outfilestr != 0 && strcmp(outfilestr, "-")) {
outfile = fopen(outfilestr, "wb");
if (outfile == 0) {
UnicodeString str1(outfilestr, "");
UnicodeString str2(strerror(errno), "");
initMsg(pname);
u_wmsg(stderr, "cantCreateOutputF",
str1.getBuffer(), str2.getBuffer());
return 1;
}
} else {
outfilestr = "-";
outfile = stdout;
#ifdef WIN32
if (setmode(fileno(outfile), O_BINARY) == -1) {
u_wmsg(stderr, "cantSetOutBinMode");
exit(-1);
}
#endif
}
/* Loop again on the arguments to find all the input files, and
convert them. XXX Cheap and sloppy. */
for (iter = argv + 1; iter != end; iter++) {
if (strcmp("-f", *iter) == 0 || !strcmp("--from-code", *iter)) {
iter++;
} else if (strcmp("-t", *iter) == 0 || !strcmp("--to-code", *iter)) {
iter++;
} else if (strcmp("-x", *iter) == 0) {
iter++;
} else if (!strcmp("--fallback", *iter)) {
;
} else if (!strcmp("--no-fallback", *iter)) {
;
} else if (strcmp("-b", *iter) == 0 || !strcmp("--block-size", *iter)) {
iter++;
} else if (strcmp("-l", *iter) == 0 || !strcmp("--list", *iter)) {
;
} else if (strcmp("--default-code", *iter) == 0) {
;
} else if (strcmp("--list-code", *iter) == 0) {
;
} else if (strcmp("--canon", *iter) == 0) {
;
} else if (strcmp("-L", *iter) == 0
|| !strcmp("--list-transliterators", *iter)) {
;
} else if (strcmp("-h", *iter) == 0 || !strcmp("-?", *iter)
|| !strcmp("--help", *iter)) {
;
} else if (!strcmp("-c", *iter)) {
;
} else if (!strcmp("--to-callback", *iter)) {
iter++;
} else if (!strcmp("--from-callback", *iter)) {
iter++;
} else if (!strcmp("-i", *iter)) {
;
} else if (!strcmp("--callback", *iter)) {
iter++;
} else if (!strcmp("-s", *iter) || !strcmp("--silent", *iter)) {
;
} else if (!strcmp("-v", *iter) || !strcmp("--verbose", *iter)) {
;
} else if (!strcmp("-V", *iter) || !strcmp("--version", *iter)) {
;
} else if (!strcmp("-o", *iter) || !strcmp("--output", *iter)) {
++iter;
} else {
seenf = 1;
if (!convertFile
(pname, fromcpage, toucallback, touctxt, tocpage,
fromucallback, fromuctxt, fallback, bufsz, translit, *iter,
outfile, verbose)) {
goto error_exit;
}
}
}
if (!seenf) {
if (!convertFile
(pname, fromcpage, toucallback, touctxt, tocpage,
fromucallback, fromuctxt, fallback, bufsz, translit, 0, outfile,
verbose)) {
goto error_exit;
}
}
goto normal_exit;
error_exit:
ret = 1;
normal_exit:
if (outfile != stdout)
fclose(outfile);
return ret;
}
/*
* Hey, Emacs, please set the following:
*
* Local Variables:
* indent-tabs-mode: nil
* End:
*
*/