scuffed-code/icu4c/source/extra/uconv/uconv.cpp

/*****************************************************************************
*
*   Copyright (C) 1999-2003, International Business Machines
*   Corporation and others.  All Rights Reserved.
*
******************************************************************************/

/*
 * uconv(1): an iconv(1)-like converter using ICU.
 *
 * Original code by Jonas Utterstr<EFBFBD>m <jonas.utterstrom@vittran.norrnod.se>
 * contributed in 1999.
 *
 * Conversion to the C conversion API and many improvements by
 * Yves Arrouye <yves@realnames.com>, current maintainer.
 *
 * Markus Scherer maintainer from 2003.
 * See source code repository history for changes.
 */

#include <unicode/utypes.h>
#include <unicode/ucnv.h>
#include <unicode/uenum.h>
#include <unicode/unistr.h>
#include <unicode/translit.h>
#include <unicode/uset.h>
#include <unicode/uclean.h>

#include <stdio.h>
#include <errno.h>
#include <string.h>
#include <stdlib.h>

#include "cmemory.h"
#include "cstring.h"
#include "ustrfmt.h"

#include "unicode/uwmsg.h"

#if defined(WIN32) || defined(U_CYGWIN)
#include <io.h>
#include <fcntl.h>
#endif

#ifdef UCONVMSG_LINK
/* below from the README */
#include "unicode/utypes.h"
#include "unicode/udata.h"
U_CFUNC char uconvmsg_dat[];
#endif

#define LENGTHOF(array) (int32_t)(sizeof(array)/sizeof((array)[0]))

#define DEFAULT_BUFSZ   4096
#define UCONVMSG "uconvmsg"

static UResourceBundle *gBundle = 0;    /* Bundle containing messages. */

/*
 * Initialize the message bundle so that message strings can be fetched
 * by u_wmsg().
 *
 */

static void initMsg(const char *pname) {
    static int ps = 0;

    if (!ps) {
        char dataPath[2048];        /* XXX Sloppy: should be PATH_MAX. */
        UErrorCode err = U_ZERO_ERROR;

        ps = 1;

        /* Set up our static data - if any */
#ifdef UCONVMSG_LINK
        udata_setAppData(UCONVMSG, (const void*) uconvmsg_dat, &err);
        if (U_FAILURE(err)) {
          fprintf(stderr, "%s: warning, problem installing our static resource bundle data uconvmsg: %s - trying anyways.\n",
                  pname, u_errorName(err));
          err = U_ZERO_ERROR; /* It may still fail */
        }
#endif

        /* Get messages. */
        gBundle = u_wmsg_setPath(UCONVMSG, &err);
        if (U_FAILURE(err)) {
            fprintf(stderr,
                    "%s: warning: couldn't open bundle %s: %s\n",
                    pname, UCONVMSG, u_errorName(err));
#ifdef UCONVMSG_LINK
            fprintf(stderr,
                    "%s: setAppData was called, internal data %s failed to load\n",
                        pname, UCONVMSG);
#endif
 
            err = U_ZERO_ERROR;
            /* that was try #1, try again with a path */
            uprv_strcpy(dataPath, u_getDataDirectory());
            uprv_strcat(dataPath, U_FILE_SEP_STRING);
            uprv_strcat(dataPath, UCONVMSG);

            gBundle = u_wmsg_setPath(dataPath, &err);
            if (U_FAILURE(err)) {
                fprintf(stderr,
                    "%s: warning: still couldn't open bundle %s: %s\n",
                    pname, dataPath, u_errorName(err));
                fprintf(stderr, "%s: warning: messages will not be displayed\n", pname);
            }
        }
    }
}

/* Mapping of callback names to the callbacks passed to the converter
   API. */

static struct callback_ent {
    const char *name;
    UConverterFromUCallback fromu;
    const void *fromuctxt;
    UConverterToUCallback tou;
    const void *touctxt;
} transcode_callbacks[] = {
    { "substitute",
      UCNV_FROM_U_CALLBACK_SUBSTITUTE, 0,
      UCNV_TO_U_CALLBACK_SUBSTITUTE, 0 },
    { "skip",
      UCNV_FROM_U_CALLBACK_SKIP, 0,
      UCNV_TO_U_CALLBACK_SKIP, 0 },
    { "stop",
      UCNV_FROM_U_CALLBACK_STOP, 0,
      UCNV_TO_U_CALLBACK_STOP, 0 },
    { "escape",
      UCNV_FROM_U_CALLBACK_ESCAPE, 0,
      UCNV_TO_U_CALLBACK_ESCAPE, 0},
    { "escape-icu",
      UCNV_FROM_U_CALLBACK_ESCAPE, UCNV_ESCAPE_ICU,
      UCNV_TO_U_CALLBACK_ESCAPE, UCNV_ESCAPE_ICU },
    { "escape-java",
      UCNV_FROM_U_CALLBACK_ESCAPE, UCNV_ESCAPE_JAVA,
      UCNV_TO_U_CALLBACK_ESCAPE, UCNV_ESCAPE_JAVA },
    { "escape-c",
      UCNV_FROM_U_CALLBACK_ESCAPE, UCNV_ESCAPE_C,
      UCNV_TO_U_CALLBACK_ESCAPE, UCNV_ESCAPE_C },
    { "escape-xml",
      UCNV_FROM_U_CALLBACK_ESCAPE, UCNV_ESCAPE_XML_HEX,
      UCNV_TO_U_CALLBACK_ESCAPE, UCNV_ESCAPE_XML_HEX },
    { "escape-xml-hex",
      UCNV_FROM_U_CALLBACK_ESCAPE, UCNV_ESCAPE_XML_HEX,
      UCNV_TO_U_CALLBACK_ESCAPE, UCNV_ESCAPE_XML_HEX },
    { "escape-xml-dec",
      UCNV_FROM_U_CALLBACK_ESCAPE, UCNV_ESCAPE_XML_DEC,
      UCNV_TO_U_CALLBACK_ESCAPE, UCNV_ESCAPE_XML_DEC },
    { "escape-unicode", UCNV_FROM_U_CALLBACK_ESCAPE, UCNV_ESCAPE_UNICODE,
      UCNV_TO_U_CALLBACK_ESCAPE, UCNV_ESCAPE_UNICODE }
};

/* Return a pointer to a callback record given its name. */

static const struct callback_ent *findCallback(const char *name) {
    int i, count =
        sizeof(transcode_callbacks) / sizeof(*transcode_callbacks);

    /* We'll do a linear search, there aren't many of them and bsearch()
       may not be that portable. */

    for (i = 0; i < count; ++i) {
        if (!uprv_stricmp(name, transcode_callbacks[i].name)) {
            return &transcode_callbacks[i];
        }
    }

    return 0;
}

/* Print converter information. If lookfor is set, only that converter will
   be printed, otherwise all converters will be printed. If canon is non
   zero, tags and aliases for each converter are printed too, in the format
   expected for convrters.txt(5). */

static int printConverters(const char *pname, const char *lookfor,
    UBool canon)
{
    UErrorCode err = U_ZERO_ERROR;
    int32_t num;
    uint16_t num_stds;
    const char **stds;

    /* If there is a specified name, just handle that now. */

    if (lookfor) {
        if (!canon) {
            printf("%s\n", lookfor);
            return 0;
        } else {
        /*  Because we are printing a canonical name, we need the
            true converter name. We've done that already except for
            the default name (because we want to print the exact
            name one would get when calling ucnv_getDefaultName()
            in non-canon mode). But since we do not know at this
            point if we have the default name or something else, we
            need to normalize again to the canonical converter
            name. */

            const char *truename = ucnv_getAlias(lookfor, 0, &err);
            if (U_SUCCESS(err)) {
                lookfor = truename;
            } else {
                err = U_ZERO_ERROR;
            }
        }
    }

    /* Print converter names. We come here for one of two reasons: we
       are printing all the names (lookfor was null), or we have a
       single converter to print but in canon mode, hence we need to
       get to it in order to print everything. */

    num = ucnv_countAvailable();
    if (num <= 0) {
        initMsg(pname);
        u_wmsg(stderr, "cantGetNames");
        return -1;
    }
    if (lookfor) {
        num = 1;                /* We know where we want to be. */
    }

    num_stds = ucnv_countStandards();
    stds = (const char **) uprv_malloc(num_stds * sizeof(*stds));
    if (!stds) {
        u_wmsg(stderr, "cantGetTag", u_wmsg_errorName(U_MEMORY_ALLOCATION_ERROR));
        return -1;
    } else {
        uint16_t s;

        if (canon) {
            printf("{ ");
        }
        for (s = 0; s < num_stds; ++s) {
            stds[s] = ucnv_getStandard(s, &err);
            if (canon) {
                printf("%s ", stds[s]);
            }
            if (U_FAILURE(err)) {
                u_wmsg(stderr, "cantGetTag", u_wmsg_errorName(err));
                return -1;
            }
        }
        if (canon) {
            puts("}");
        }
    }

    for (int32_t i = 0; i < num; i++) {
        const char *name;
        uint16_t num_aliases;

        /* Set the name either to what we are looking for, or
        to the current converter name. */

        if (lookfor) {
            name = lookfor;
        } else {
            name = ucnv_getAvailableName(i);
        }

        /* Get all the aliases associated to the name. */

        err = U_ZERO_ERROR;
        num_aliases = ucnv_countAliases(name, &err);
        if (U_FAILURE(err)) {
            printf("%s", name);

            UnicodeString str(name, "");
            putchar('\t');
            u_wmsg(stderr, "cantGetAliases", str.getTerminatedBuffer(),
                u_wmsg_errorName(err));
            return -1;
        } else {
            uint16_t a, s, t;

            /* Write all the aliases and their tags. */

            for (a = 0; a < num_aliases; ++a) {
                const char *alias = ucnv_getAlias(name, a, &err);

                if (U_FAILURE(err)) {
                    UnicodeString str(name, "");
                    putchar('\t');
                    u_wmsg(stderr, "cantGetAliases", str.getTerminatedBuffer(),
                        u_wmsg_errorName(err));
                    return -1;
                }

                /* Print the current alias so that it looks right. */
                printf("%s%s%s", (canon ? (a == 0? "" : "\t" ) : "") ,
                                 alias,
                                 (canon ? "" : " "));

                /* Look (slowly, linear searching) for a tag. */

                if (canon) {
                    /* -1 to skip the last standard */
                    for (s = t = 0; s < num_stds-1; ++s) {
                        UEnumeration *nameEnum = ucnv_openStandardNames(name, stds[s], &err);
                        if (U_SUCCESS(err)) {
                            /* List the standard tags */
                            const char *standardName;
                            UBool isFirst = TRUE;
                            UErrorCode enumError = U_ZERO_ERROR;
                            while ((standardName = uenum_next(nameEnum, NULL, &enumError))) {
                                /* See if this alias is supported by this standard. */
                                if (!strcmp(standardName, alias)) {
                                    if (!t) {
                                        printf(" {");
                                        t = 1;
                                    }
                                    /* Print a * after the default standard name */
                                    printf(" %s%s", stds[s], (isFirst ? "*" : ""));
                                }
                                isFirst = FALSE;
                            }
                        }
                    }
                    if (t) {
                        printf(" }");
                    }
                }
                /* Terminate this entry. */
                if (canon) {
                    puts("");
                }

                /* Move on. */
            }
            /* Terminate this entry. */
            if (!canon) {
                puts("");
            }
        }
    }

    /* Free temporary data. */

    uprv_free(stds);

    /* Success. */

    return 0;
}

/* Print all available transliterators. If canon is non zero, print
   one transliterator per line. */

static int printTransliterators(UBool canon)
{
#if UCONFIG_NO_TRANSLITERATION
    printf("no transliterators available because of UCONFIG_NO_TRANSLITERATION, see uconfig.h\n");
    return 1;
#else
    int32_t numtrans = utrans_countAvailableIDs(), i;
    int buflen = 512;
    char *buf = (char *) uprv_malloc(buflen);
    char staticbuf[512];

    char sepchar = canon ? '\n' : ' ';

    if (!buf) {
        buf = staticbuf;
        buflen = sizeof(staticbuf);
    }

    for (i = 0; i < numtrans; ++i) {
        int32_t len = utrans_getAvailableID(i, buf, buflen);
        if (len >= buflen - 1) {
            if (buf != staticbuf) {
                buflen <<= 1;
                if (buflen < len) {
                    buflen = len + 64;
                }
                buf = (char *) uprv_realloc(buf, buflen);
                if (!buf) {
                    buf = staticbuf;
                    buflen = sizeof(staticbuf);
                }
            }
            utrans_getAvailableID(i, buf, buflen);
            if (len >= buflen) {
                uprv_strcpy(buf + buflen - 4, "..."); /* Truncate the name. */
            }
        }

        printf("%s", buf);
        if (i < numtrans - 1) {
            putchar(sepchar);
        }
    }

    /* Add a terminating newline if needed. */

    if (sepchar != '\n') {
        putchar('\n');
    }

    /* Free temporary data. */

    if (buf != staticbuf) {
        uprv_free(buf);
    }

    /* Success. */

    return 0;
#endif
}

enum {
    uSP = 0x20,         // space
    uCR = 0xd,          // carriage return
    uLF = 0xa,          // line feed
    uNL = 0x85,         // newline
    uLS = 0x2028,       // line separator
    uPS = 0x2029,       // paragraph separator
    uSig = 0xfeff       // signature/BOM character
};

static inline int32_t
getChunkLimit(const UnicodeString &prev, const UnicodeString &s) {
    // find one of
    // CR, LF, CRLF, NL, LS, PS
    // for paragraph ends (see UAX #13/Unicode 4)
    // and include it in the chunk
    // all of these characters are on the BMP
    // do not include FF or VT in case they are part of a paragraph
    // (important for bidi contexts)
    static const UChar paraEnds[] = {
        0xd, 0xa, 0x85, 0x2028, 0x2029
    };
    enum {
        iCR, iLF, iNL, iLS, iPS, iCount
    };

    // first, see if there is a CRLF split between prev and s
    if (prev.endsWith(paraEnds + iCR, 1)) {
        if (s.startsWith(paraEnds + iLF, 1)) {
            return 1; // split CRLF, include the LF
        } else if (!s.isEmpty()) {
            return 0; // complete the last chunk
        } else {
            return -1; // wait for actual further contents to arrive
        }
    }

    const UChar *u = s.getBuffer(), *limit = u + s.length();
    UChar c;

    while (u < limit) {
        c = *u++;
        if (
            ((c < uSP) && (c == uCR || c == uLF)) ||
            (c == uNL) ||
            ((c & uLS) == uLS)
        ) {
            if (c == uCR) {
                // check for CRLF
                if (u == limit) {
                    return -1; // LF may be in the next chunk
                } else if (*u == uLF) {
                    ++u; // include the LF in this chunk
                }
            }
            return (int32_t)(u - s.getBuffer());
        }
    }

    return -1; // continue collecting the chunk
}

enum {
    CNV_NO_FEFF,    // cannot convert the U+FEFF Unicode signature character (BOM)
    CNV_WITH_FEFF,  // can convert the U+FEFF signature character
    CNV_ADDS_FEFF   // automatically adds/detects the U+FEFF signature character
};

static inline UChar
nibbleToHex(uint8_t n) {
    n &= 0xf;
    return
        n <= 9 ?
            (UChar)(0x30 + n) :
            (UChar)((0x61 - 10) + n);
}

// check the converter's Unicode signature properties;
// the fromUnicode side of the converter must be in its initial state
// and will be reset again if it was used
static int32_t
cnvSigType(UConverter *cnv) {
    UErrorCode err;
    int32_t result;

    // test if the output charset can convert U+FEFF
    USet *set = uset_open(1, 0);
    err = U_ZERO_ERROR;
    ucnv_getUnicodeSet(cnv, set, UCNV_ROUNDTRIP_SET, &err);
    if (U_SUCCESS(err) && uset_contains(set, uSig)) {
        result = CNV_WITH_FEFF;
    } else {
        result = CNV_NO_FEFF; // an error occurred or U+FEFF cannot be converted
    }
    uset_close(set);

    if (result == CNV_WITH_FEFF) {
        // test if the output charset emits a signature anyway
        const UChar a[1] = { 0x61 }; // "a"
        const UChar *in;

        char buffer[20];
        char *out;

        in = a;
        out = buffer;
        err = U_ZERO_ERROR;
        ucnv_fromUnicode(cnv,
            &out, buffer + sizeof(buffer),
            &in, a + 1,
            NULL, TRUE, &err);
        ucnv_resetFromUnicode(cnv);

        if (NULL != ucnv_detectUnicodeSignature(buffer, (int32_t)(out - buffer), NULL, &err) &&
            U_SUCCESS(err)
        ) {
            result = CNV_ADDS_FEFF;
        }
    }

    return result;
}

class ConvertFile {
public:
    ConvertFile() :
        buf(NULL), outbuf(NULL), fromoffsets(NULL),
        bufsz(0), signature(0) {}

    void
    setBufferSize(size_t bufferSize) {
        bufsz = bufferSize;

        buf = new char[2 * bufsz];
        outbuf = buf + bufsz;

        // +1 for an added U+FEFF in the intermediate Unicode buffer
        fromoffsets = new int32_t[bufsz + 1];
    }

    ~ConvertFile() {
        delete [] buf;
        delete [] fromoffsets;
    }

    UBool convertFile(const char *pname,
                      const char *fromcpage,
                      UConverterToUCallback toucallback,
                      const void *touctxt,
                      const char *tocpage,
                      UConverterFromUCallback fromucallback,
                      const void *fromuctxt,
                      UBool fallback,
                      const char *translit,
                      const char *infilestr,
                      FILE * outfile, int verbose);
private:
    friend int main(int argc, char **argv);

    char *buf, *outbuf;
    int32_t *fromoffsets;

    size_t bufsz;
    int8_t signature; // add (1) or remove (-1) a U+FEFF Unicode signature character
};

// Convert a file from one encoding to another
UBool
ConvertFile::convertFile(const char *pname,
                         const char *fromcpage,
                         UConverterToUCallback toucallback,
                         const void *touctxt,
                         const char *tocpage,
                         UConverterFromUCallback fromucallback,
                         const void *fromuctxt,
                         UBool fallback,
                         const char *translit,
                         const char *infilestr,
                         FILE * outfile, int verbose)
{
    FILE *infile;
    UBool ret = TRUE;
    UConverter *convfrom = 0;
    UConverter *convto = 0;
    UErrorCode err = U_ZERO_ERROR;
    UBool flush;
    const char *cbufp, *prevbufp;
    char *bufp;

    uint32_t infoffset = 0, outfoffset = 0;   /* Where we are in the file, for error reporting. */

    const UChar *unibuf, *unibufbp;
    UChar *unibufp;

    size_t rd, wr;

#if !UCONFIG_NO_TRANSLITERATION
    Transliterator *t = 0;      // Transliterator acting on Unicode data.
    UnicodeString chunk;        // One chunk of the text being collected for transformation.
#endif
    UnicodeString u;            // String to do the transliteration.
    int32_t ulen;

    // use conversion offsets for error messages
    // unless a transliterator is used -
    // a text transformation will reorder characters in unpredictable ways
    UBool useOffsets = TRUE;

    // Open the correct input file or connect to stdin for reading input

    if (infilestr != 0 && strcmp(infilestr, "-")) {
        infile = fopen(infilestr, "rb");
        if (infile == 0) {
            UnicodeString str1(infilestr, "");
            str1.append((UChar32) 0);
            UnicodeString str2(strerror(errno), "");
            str2.append((UChar32) 0);
            initMsg(pname);
            u_wmsg(stderr, "cantOpenInputF", str1.getBuffer(), str2.getBuffer());
            return FALSE;
        }
    } else {
        infilestr = "-";
        infile = stdin;
#if defined(WIN32) || defined(U_CYGWIN)
        if (setmode(fileno(stdin), O_BINARY) == -1) {
            initMsg(pname);
            u_wmsg(stderr, "cantSetInBinMode");
            return FALSE;
        }
#endif
    }

    if (verbose) {
        fprintf(stderr, "%s:\n", infilestr);
    }

#if !UCONFIG_NO_TRANSLITERATION
    // Create transliterator as needed.

    if (translit != NULL && *translit) {
        UParseError parse;
        UnicodeString str(translit), pestr;

        /* Create from rules or by ID as needed. */

        parse.line = -1;

        if (uprv_strchr(translit, ':') || uprv_strchr(translit, '>') || uprv_strchr(translit, '<') || uprv_strchr(translit, '>')) {
            t = Transliterator::createFromRules("Uconv", str, UTRANS_FORWARD, parse, err);
        } else {
            t = Transliterator::createInstance(translit, UTRANS_FORWARD, err);
        }

        if (U_FAILURE(err)) {
            str.append((UChar32) 0);
            initMsg(pname);

            if (parse.line >= 0) {
                UChar linebuf[20], offsetbuf[20];
                uprv_itou(linebuf, 20, parse.line, 10, 0);
                uprv_itou(offsetbuf, 20, parse.offset, 10, 0);
                u_wmsg(stderr, "cantCreateTranslitParseErr", str.getTerminatedBuffer(),
                    u_wmsg_errorName(err), linebuf, offsetbuf);
            } else {
                u_wmsg(stderr, "cantCreateTranslit", str.getTerminatedBuffer(),
                    u_wmsg_errorName(err));
            }

            if (t) {
                delete t;
                t = 0;
            }
            goto error_exit;
        }

        useOffsets = FALSE;
    }
#endif

    // Create codepage converter. If the codepage or its aliases weren't
    // available, it returns NULL and a failure code. We also set the
    // callbacks, and return errors in the same way.

    convfrom = ucnv_open(fromcpage, &err);
    if (U_FAILURE(err)) {
        UnicodeString str(fromcpage, "");
        initMsg(pname);
        u_wmsg(stderr, "cantOpenFromCodeset", str.getTerminatedBuffer(),
            u_wmsg_errorName(err));
        goto error_exit;
    }
    ucnv_setToUCallBack(convfrom, toucallback, touctxt, 0, 0, &err);
    if (U_FAILURE(err)) {
        initMsg(pname);
        u_wmsg(stderr, "cantSetCallback", u_wmsg_errorName(err));
        goto error_exit;
    }

    convto = ucnv_open(tocpage, &err);
    if (U_FAILURE(err)) {
        UnicodeString str(tocpage, "");
        initMsg(pname);
        u_wmsg(stderr, "cantOpenToCodeset", str.getTerminatedBuffer(),
            u_wmsg_errorName(err));
        goto error_exit;
    }
    ucnv_setFromUCallBack(convto, fromucallback, fromuctxt, 0, 0, &err);
    if (U_FAILURE(err)) {
        initMsg(pname);
        u_wmsg(stderr, "cantSetCallback", u_wmsg_errorName(err));
        goto error_exit;
    }
    ucnv_setFallback(convto, fallback);

    UBool willexit, fromSawEndOfBytes, toSawEndOfUnicode;
    int8_t sig;

    // OK, we can convert now.
    sig = signature;
    rd = 0;

    do {
        willexit = FALSE;

        // input file offset at the beginning of the next buffer
        infoffset += rd;

        rd = fread(buf, 1, bufsz, infile);
        if (ferror(infile) != 0) {
            UnicodeString str(strerror(errno));
            initMsg(pname);
            u_wmsg(stderr, "cantRead", str.getTerminatedBuffer());
            goto error_exit;
        }

        // Convert the read buffer into the new encoding via Unicode.
        // After the call 'unibufp' will be placed behind the last
        // character that was converted in the 'unibuf'.
        // Also the 'cbufp' is positioned behind the last converted
        // character.
        // At the last conversion in the file, flush should be set to
        // true so that we get all characters converted.
        //
        // The converter must be flushed at the end of conversion so
        // that characters on hold also will be written.

        cbufp = buf;
        flush = (UBool)(rd != bufsz);

        // convert until the input is consumed
        do {
            // remember the start of the current byte-to-Unicode conversion
            prevbufp = cbufp;

            unibuf = unibufp = u.getBuffer((int32_t)bufsz);

            // Use bufsz instead of u.getCapacity() for the targetLimit
            // so that we don't overflow fromoffsets[].
            ucnv_toUnicode(convfrom, &unibufp, unibuf + bufsz, &cbufp,
                buf + rd, useOffsets ? fromoffsets : NULL, flush, &err);

            ulen = (int32_t)(unibufp - unibuf);
            u.releaseBuffer(ulen);

            // fromSawEndOfBytes indicates that ucnv_toUnicode() is done
            // converting all of the input bytes.
            // It works like this because ucnv_toUnicode() returns only under the
            // following conditions:
            // - an error occurred during conversion (an error code is set)
            // - the target buffer is filled (the error code indicates an overflow)
            // - the source is consumed
            // That is, if the error code does not indicate a failure,
            // not even an overflow, then the source must be consumed entirely.
            fromSawEndOfBytes = (UBool)U_SUCCESS(err);

            if (err == U_BUFFER_OVERFLOW_ERROR) {
                err = U_ZERO_ERROR;
            } else if (U_FAILURE(err)) {
                char pos[32], errorBytes[32];
                int8_t i, length, errorLength;

                UErrorCode localError = U_ZERO_ERROR;
                errorLength = (int8_t)sizeof(errorBytes);
                ucnv_getInvalidChars(convfrom, errorBytes, &errorLength, &localError);
                if (U_FAILURE(localError) || errorLength == 0) {
                    errorLength = 1;
                }

                // print the input file offset of the start of the error bytes:
                // input file offset of the current byte buffer +
                // length of the just consumed bytes -
                // length of the error bytes
                length =
                    (int8_t)sprintf(pos, "%d",
                        (int)(infoffset + (cbufp - buf) - errorLength));

                // output the bytes that caused the error
                UnicodeString str;
                for (i = 0; i < errorLength; ++i) {
                    if (i > 0) {
                        str.append((UChar)uSP);
                    }
                    str.append(nibbleToHex((uint8_t)errorBytes[i] >> 4));
                    str.append(nibbleToHex((uint8_t)errorBytes[i]));
                }

                initMsg(pname);
                u_wmsg(stderr, "problemCvtToU",
                        UnicodeString(pos, length, "").getTerminatedBuffer(),
                        str.getTerminatedBuffer(),
                        u_wmsg_errorName(err));

                willexit = TRUE;
                err = U_ZERO_ERROR; /* reset the error for the rest of the conversion. */
            }

            // Replaced a check for whether the input was consumed by
            // looping until it is; message key "premEndInput" now obsolete.

            if (ulen == 0) {
                continue;
            }

            // remove a U+FEFF Unicode signature character if requested
            if (sig < 0) {
                if (u.charAt(0) == uSig) {
                    u.remove(0, 1);

                    // account for the removed UChar and offset
                    --ulen;

                    if (useOffsets) {
                        // remove an offset from fromoffsets[] as well
                        // to keep the array parallel with the UChars
                        memmove(fromoffsets, fromoffsets + 1, ulen * 4);
                    }

                }
                sig = 0;
            }

#if !UCONFIG_NO_TRANSLITERATION
            // Transliterate/transform if needed.

            // For transformation, we use chunking code -
            // collect Unicode input until, for example, an end-of-line,
            // then transform and output-convert that and continue collecting.
            // This makes the transformation result independent of the buffer size
            // while avoiding the slower keyboard mode.
            // The end-of-chunk characters are completely included in the
            // transformed string in case they are to be transformed themselves.
            if (t != NULL) {
                UnicodeString out;
                int32_t chunkLimit;

                do {
                    chunkLimit = getChunkLimit(chunk, u);
                    if (chunkLimit < 0 && flush && fromSawEndOfBytes) {
                        // use all of the rest at the end of the text
                        chunkLimit = u.length();
                    }
                    if (chunkLimit >= 0) {
                        // complete the chunk and transform it
                        chunk.append(u, 0, chunkLimit);
                        u.remove(0, chunkLimit);
                        t->transliterate(chunk);

                        // append the transformation result to the result and empty the chunk
                        out.append(chunk);
                        chunk.remove();
                    } else {
                        // continue collecting the chunk
                        chunk.append(u);
                        break;
                    }
                } while (!u.isEmpty());

                u = out;
                ulen = u.length();
            }
#endif

            // add a U+FEFF Unicode signature character if requested
            // and possible/necessary
            if (sig > 0) {
                if (u.charAt(0) != uSig && cnvSigType(convto) == CNV_WITH_FEFF) {
                    u.insert(0, (UChar)uSig);

                    if (useOffsets) {
                        // insert a pseudo-offset into fromoffsets[] as well
                        // to keep the array parallel with the UChars
                        memmove(fromoffsets + 1, fromoffsets, ulen * 4);
                        fromoffsets[0] = -1;
                    }

                    // account for the additional UChar and offset
                    ++ulen;
                }
                sig = 0;
            }

            // Convert the Unicode buffer into the destination codepage
            // Again 'bufp' will be placed behind the last converted character
            // And 'unibufp' will be placed behind the last converted unicode character
            // At the last conversion flush should be set to true to ensure that
            // all characters left get converted

            unibuf = unibufbp = u.getBuffer();

            do {
                bufp = outbuf;

                // Use fromSawEndOfBytes in addition to the flush flag -
                // it indicates whether the intermediate Unicode string
                // contains the very last UChars for the very last input bytes.
                ucnv_fromUnicode(convto, &bufp, outbuf + bufsz,
                                 &unibufbp,
                                 unibuf + ulen,
                                 NULL, (UBool)(flush && fromSawEndOfBytes), &err);

                // toSawEndOfUnicode indicates that ucnv_fromUnicode() is done
                // converting all of the intermediate UChars.
                // See comment for fromSawEndOfBytes.
                toSawEndOfUnicode = (UBool)U_SUCCESS(err);

                if (err == U_BUFFER_OVERFLOW_ERROR) {
                    err = U_ZERO_ERROR;
                } else if (U_FAILURE(err)) {
                    UChar errorUChars[4];
                    const char *errtag;
                    char pos[32];
                    UChar32 c;
                    int8_t i, length, errorLength;

                    UErrorCode localError = U_ZERO_ERROR;
                    errorLength = (int8_t)LENGTHOF(errorUChars);
                    ucnv_getInvalidUChars(convto, errorUChars, &errorLength, &localError);
                    if (U_FAILURE(localError) || errorLength == 0) {
                        // need at least 1 so that we don't access beyond the length of fromoffsets[]
                        errorLength = 1;
                    }

                    int32_t ferroffset;

                    if (useOffsets) {
                        // Unicode buffer offset of the start of the error UChars
                        ferroffset = (int32_t)((unibufbp - unibuf) - errorLength);
                        if (ferroffset < 0) {
                            // approximation - the character started in the previous Unicode buffer
                            ferroffset = 0;
                        }

                        // get the corresponding byte offset out of fromoffsets[]
                        // go back if the offset is not known for some of the UChars
                        int32_t fromoffset;
                        do {
                            fromoffset = fromoffsets[ferroffset];
                        } while (fromoffset < 0 && --ferroffset >= 0);

                        // total input file offset =
                        // input file offset of the current byte buffer +
                        // byte buffer offset of where the current Unicode buffer is converted from +
                        // fromoffsets[Unicode offset]
                        ferroffset = infoffset + (prevbufp - buf) + fromoffset;
                        errtag = "problemCvtFromU";
                    } else {
                        // Do not use fromoffsets if (t != NULL) because the Unicode text may
                        // be different from what the offsets refer to.

                        // output file offset
                        ferroffset = (int32_t)(outfoffset + (bufp - outbuf));
                        errtag = "problemCvtFromUOut";
                    }

                    length = (int8_t)sprintf(pos, "%u", ferroffset);

                    // output the code points that caused the error
                    UnicodeString str;
                    for (i = 0; i < errorLength;) {
                        if (i > 0) {
                            str.append((UChar)uSP);
                        }
                        U16_NEXT(errorUChars, i, errorLength, c);
                        if (c >= 0x100000) {
                            str.append(nibbleToHex((uint8_t)(c >> 20)));
                        }
                        if (c >= 0x10000) {
                            str.append(nibbleToHex((uint8_t)(c >> 16)));
                        }
                        str.append(nibbleToHex((uint8_t)(c >> 12)));
                        str.append(nibbleToHex((uint8_t)(c >> 8)));
                        str.append(nibbleToHex((uint8_t)(c >> 4)));
                        str.append(nibbleToHex((uint8_t)c));
                    }

                    initMsg(pname);
                    u_wmsg(stderr, errtag,
                            UnicodeString(pos, length, "").getTerminatedBuffer(),
                            str.getTerminatedBuffer(),
                           u_wmsg_errorName(err));
                    u_wmsg(stderr, "errorUnicode", str.getTerminatedBuffer());

                    willexit = TRUE;
                    err = U_ZERO_ERROR; /* reset the error for the rest of the conversion. */
                }

                // Replaced a check for whether the intermediate Unicode characters were all consumed by
                // looping until they are; message key "premEnd" now obsolete.

                // Finally, write the converted buffer to the output file
                size_t outlen = (size_t) (bufp - outbuf);
                outfoffset += (int32_t)(wr = fwrite(outbuf, 1, outlen, outfile));
                if (wr != outlen) {
                    UnicodeString str(strerror(errno));
                    initMsg(pname);
                    u_wmsg(stderr, "cantWrite", str.getTerminatedBuffer());
                    willexit = TRUE;
                }

                if (willexit) {
                    goto error_exit;
                }
            } while (!toSawEndOfUnicode);
        } while (!fromSawEndOfBytes);
    } while (!flush);           // Stop when we have flushed the
                                // converters (this means that it's
                                // the end of output)

    goto normal_exit;

error_exit:
    ret = FALSE;

normal_exit:
    // Cleanup.

    ucnv_close(convfrom);
    ucnv_close(convto);

#if !UCONFIG_NO_TRANSLITERATION
    delete t;
#endif

    if (infile != stdin) {
        fclose(infile);
    }

    return ret;
}

static void usage(const char *pname, int ecode) {
    const UChar *msg;
    int32_t msgLen;
    UErrorCode err = U_ZERO_ERROR;
    FILE *fp = ecode ? stderr : stdout;
    int res;

    initMsg(pname);
    msg =
        ures_getStringByKey(gBundle, ecode ? "lcUsageWord" : "ucUsageWord",
                            &msgLen, &err);
    UnicodeString upname(pname, (int32_t)(uprv_strlen(pname) + 1));
    UnicodeString mname(msg, msgLen + 1);

    res = u_wmsg(fp, "usage", mname.getBuffer(), upname.getBuffer());
    if (!ecode) {
        if (!res) {
            fputc('\n', fp);
        }
        if (!u_wmsg(fp, "help")) {
            /* Now dump callbacks and finish. */

            int i, count =
                sizeof(transcode_callbacks) / sizeof(*transcode_callbacks);
            for (i = 0; i < count; ++i) {
                fprintf(fp, " %s", transcode_callbacks[i].name);
            }
            fputc('\n', fp);
        }
    }

    exit(ecode);
}

extern int
main(int argc, char **argv)
{
    FILE *outfile;
    int ret = 0;

    size_t bufsz = DEFAULT_BUFSZ;

    const char *fromcpage = 0;
    const char *tocpage = 0;
    const char *translit = 0;
    const char *outfilestr = 0;
    UBool fallback = FALSE;

    UConverterFromUCallback fromucallback = UCNV_FROM_U_CALLBACK_STOP;
    const void *fromuctxt = 0;
    UConverterToUCallback toucallback = UCNV_TO_U_CALLBACK_STOP;
    const void *touctxt = 0;

    char **iter, **remainArgv, **remainArgvLimit;
    char **end = argv + argc;

    const char *pname;

    UBool printConvs = FALSE, printCanon = FALSE, printTranslits = FALSE;
    const char *printName = 0;

    UBool verbose = FALSE;
    UErrorCode status = U_ZERO_ERROR;

    ConvertFile cf;

    /* Initialize ICU */
    u_init(&status);
    if (U_FAILURE(status)) {
        fprintf(stderr, "%s: can not initialize ICU.  status = %s\n",
            argv[0], u_errorName(status));
        exit(1);
    }

    // Get and prettify pname.
    pname = uprv_strrchr(*argv, U_FILE_SEP_CHAR);
#ifdef WIN32
    if (!pname) {
        pname = uprv_strrchr(*argv, '/');
    }
#endif
    if (!pname) {
        pname = *argv;
    } else {
        ++pname;
    }

    // First, get the arguments from command-line
    // to know the codepages to convert between

    remainArgv = remainArgvLimit = argv + 1;
    for (iter = argv + 1; iter != end; iter++) {
        // Check for from charset
        if (strcmp("-f", *iter) == 0 || !strcmp("--from-code", *iter)) {
            iter++;
            if (iter != end)
                fromcpage = *iter;
            else
                usage(pname, 1);
        } else if (strcmp("-t", *iter) == 0 || !strcmp("--to-code", *iter)) {
            iter++;
            if (iter != end)
                tocpage = *iter;
            else
                usage(pname, 1);
        } else if (strcmp("-x", *iter) == 0) {
            iter++;
            if (iter != end)
                translit = *iter;
            else
                usage(pname, 1);
        } else if (!strcmp("--fallback", *iter)) {
            fallback = TRUE;
        } else if (!strcmp("--no-fallback", *iter)) {
            fallback = FALSE;
        } else if (strcmp("-b", *iter) == 0 || !strcmp("--block-size", *iter)) {
            iter++;
            if (iter != end) {
                bufsz = atoi(*iter);
                if ((int) bufsz <= 0) {
                    initMsg(pname);
                    UnicodeString str(*iter);
                    initMsg(pname);
                    u_wmsg(stderr, "badBlockSize", str.getTerminatedBuffer());
                    return 3;
                }
            } else {
                usage(pname, 1);
            }
        } else if (strcmp("-l", *iter) == 0 || !strcmp("--list", *iter)) {
            if (printTranslits) {
                usage(pname, 1);
            }
            printConvs = TRUE;
        } else if (strcmp("--default-code", *iter) == 0) {
            if (printTranslits) {
                usage(pname, 1);
            }
            printName = ucnv_getDefaultName();
        } else if (strcmp("--list-code", *iter) == 0) {
            if (printTranslits) {
                usage(pname, 1);
            }

            iter++;
            if (iter != end) {
                UErrorCode e = U_ZERO_ERROR;
                printName = ucnv_getAlias(*iter, 0, &e);
                if (U_FAILURE(e) || !printName) {
                    UnicodeString str(*iter);
                    initMsg(pname);
                    u_wmsg(stderr, "noSuchCodeset", str.getTerminatedBuffer());
                    return 2;
                }
            } else
                usage(pname, 1);
        } else if (strcmp("--canon", *iter) == 0) {
            printCanon = TRUE;
        } else if (strcmp("-L", *iter) == 0
            || !strcmp("--list-transliterators", *iter)) {
            if (printConvs) {
                usage(pname, 1);
            }
            printTranslits = TRUE;
        } else if (strcmp("-h", *iter) == 0 || !strcmp("-?", *iter)
            || !strcmp("--help", *iter)) {
            usage(pname, 0);
        } else if (!strcmp("-c", *iter)) {
            fromucallback = UCNV_FROM_U_CALLBACK_SKIP;
        } else if (!strcmp("--to-callback", *iter)) {
            iter++;
            if (iter != end) {
                const struct callback_ent *cbe = findCallback(*iter);
                if (cbe) {
                    fromucallback = cbe->fromu;
                    fromuctxt = cbe->fromuctxt;
                } else {
                    UnicodeString str(*iter);
                    initMsg(pname);
                    u_wmsg(stderr, "unknownCallback", str.getTerminatedBuffer());
                    return 4;
                }
            } else {
                usage(pname, 1);
            }
        } else if (!strcmp("--from-callback", *iter)) {
            iter++;
            if (iter != end) {
                const struct callback_ent *cbe = findCallback(*iter);
                if (cbe) {
                    toucallback = cbe->tou;
                    touctxt = cbe->touctxt;
                } else {
                    UnicodeString str(*iter);
                    initMsg(pname);
                    u_wmsg(stderr, "unknownCallback", str.getTerminatedBuffer());
                    return 4;
                }
            } else {
                usage(pname, 1);
            }
        } else if (!strcmp("-i", *iter)) {
            toucallback = UCNV_TO_U_CALLBACK_SKIP;
        } else if (!strcmp("--callback", *iter)) {
            iter++;
            if (iter != end) {
                const struct callback_ent *cbe = findCallback(*iter);
                if (cbe) {
                    fromucallback = cbe->fromu;
                    fromuctxt = cbe->fromuctxt;
                    toucallback = cbe->tou;
                    touctxt = cbe->touctxt;
                } else {
                    UnicodeString str(*iter);
                    initMsg(pname);
                    u_wmsg(stderr, "unknownCallback", str.getTerminatedBuffer());
                    return 4;
                }
            } else {
                usage(pname, 1);
            }
        } else if (!strcmp("-s", *iter) || !strcmp("--silent", *iter)) {
            verbose = FALSE;
        } else if (!strcmp("-v", *iter) || !strcmp("--verbose", *iter)) {
            verbose = TRUE;
        } else if (!strcmp("-V", *iter) || !strcmp("--version", *iter)) {
            printf("%s v2.1  ICU " U_ICU_VERSION "\n", pname);
            return 0;
        } else if (!strcmp("-o", *iter) || !strcmp("--output", *iter)) {
            ++iter;
            if (iter != end && !outfilestr) {
                outfilestr = *iter;
            } else {
                usage(pname, 1);
            }
        } else if (0 == strcmp("--add-signature", *iter)) {
            cf.signature = 1;
        } else if (0 == strcmp("--remove-signature", *iter)) {
            cf.signature = -1;
        } else if (**iter == '-' && (*iter)[1]) {
            usage(pname, 1);
        } else {
            // move a non-option up in argv[]
            *remainArgvLimit++ = *iter;
        }
    }

    if (printConvs || printName) {
        return printConverters(pname, printName, printCanon) ? 2 : 0;
    } else if (printTranslits) {
        return printTransliterators(printCanon) ? 3 : 0;
    }

    if (!fromcpage || !uprv_strcmp(fromcpage, "-")) {
        fromcpage = ucnv_getDefaultName();
    }
    if (!tocpage || !uprv_strcmp(tocpage, "-")) {
        tocpage = ucnv_getDefaultName();
    }

    // Open the correct output file or connect to stdout for reading input
    if (outfilestr != 0 && strcmp(outfilestr, "-")) {
        outfile = fopen(outfilestr, "wb");
        if (outfile == 0) {
            UnicodeString str1(outfilestr, "");
            UnicodeString str2(strerror(errno), "");
            initMsg(pname);
            u_wmsg(stderr, "cantCreateOutputF",
                str1.getBuffer(), str2.getBuffer());
            return 1;
        }
    } else {
        outfilestr = "-";
        outfile = stdout;
#if defined(WIN32) || defined(U_CYGWIN)
        if (setmode(fileno(outfile), O_BINARY) == -1) {
            u_wmsg(stderr, "cantSetOutBinMode");
            exit(-1);
        }
#endif
    }

    /* Loop again on the arguments to find all the input files, and
    convert them. */

    cf.setBufferSize(bufsz);

    if(remainArgv < remainArgvLimit) {
        for (iter = remainArgv; iter != remainArgvLimit; iter++) {
            if (!cf.convertFile(
                    pname, fromcpage, toucallback, touctxt, tocpage,
                    fromucallback, fromuctxt, fallback, translit, *iter,
                    outfile, verbose)
            ) {
                goto error_exit;
            }
        }
    } else {
        if (!cf.convertFile(
                pname, fromcpage, toucallback, touctxt, tocpage,
                fromucallback, fromuctxt, fallback, translit, 0,
                outfile, verbose)
        ) {
            goto error_exit;
        }
    }

    goto normal_exit;
error_exit:
    ret = 1;
normal_exit:

    if (outfile != stdout) {
        fclose(outfile);
    }

    return ret;
}


/*
 * Hey, Emacs, please set the following:
 *
 * Local Variables:
 * indent-tabs-mode: nil
 * End:
 *
 */
-												ICU-1605 for UCNV_ESCAPE_UNICODE, print the codepoint, not the pair of
code units. Also, delimit the U+XXXX with curly braces for now.

X-SVN-Rev: 7514
											
										
										
											2002-01-28 18:47:35 +00:00
+								/*****************************************************************************
-												ICU-1220 Added uconv as an extra (Unix only for now).

X-SVN-Rev: 5825
											
										
										
											2001-09-19 23:43:16 +00:00
+								*
-												ICU-2959 update copyright dates to include 2003

X-SVN-Rev: 12253
											
										
										
											2003-06-03 20:58:22 +00:00
+								*   Copyright (C) 1999-2003, International Business Machines
-												ICU-1220 Added uconv as an extra (Unix only for now).

X-SVN-Rev: 5825
											
										
										
											2001-09-19 23:43:16 +00:00
+								*   Corporation and others.  All Rights Reserved.
 								*
-												ICU-1605 for UCNV_ESCAPE_UNICODE, print the codepoint, not the pair of
code units. Also, delimit the U+XXXX with curly braces for now.

X-SVN-Rev: 7514
											
										
										
											2002-01-28 18:47:35 +00:00
+								******************************************************************************/
 								/*
 								 * uconv(1): an iconv(1)-like converter using ICU.
 								 *
 								 * Original code by Jonas Utterstr<EFBFBD>m <jonas.utterstrom@vittran.norrnod.se>
 								 * contributed in 1999.
 								 *
 								 * Conversion to the C conversion API and many improvements by
-												ICU-900 Fixed some compiler warnings.

X-SVN-Rev: 7714
											
										
										
											2002-02-20 00:22:12 +00:00
+								 * Yves Arrouye <yves@realnames.com>, current maintainer.
-												ICU-1605 for UCNV_ESCAPE_UNICODE, print the codepoint, not the pair of
code units. Also, delimit the U+XXXX with curly braces for now.

X-SVN-Rev: 7514
											
										
										
											2002-01-28 18:47:35 +00:00
+								 *
-												ICU-2983 fix conversion loops in uconv; add/remove signature (BOM)

X-SVN-Rev: 14205
											
										
										
											2003-12-21 01:52:09 +00:00
+								 * Markus Scherer maintainer from 2003.
 								 * See source code repository history for changes.
-												ICU-1605 for UCNV_ESCAPE_UNICODE, print the codepoint, not the pair of
code units. Also, delimit the U+XXXX with curly braces for now.

X-SVN-Rev: 7514
											
										
										
											2002-01-28 18:47:35 +00:00
+								 */
-												ICU-1220 Added uconv as an extra (Unix only for now).

X-SVN-Rev: 5825
											
										
										
											2001-09-19 23:43:16 +00:00
-												ICU-1568 use static mode for uconv by default

X-SVN-Rev: 7848
											
										
										
											2002-03-01 22:44:21 +00:00
+								#include <unicode/utypes.h>
-												ICU-1605 for UCNV_ESCAPE_UNICODE, print the codepoint, not the pair of
code units. Also, delimit the U+XXXX with curly braces for now.

X-SVN-Rev: 7514
											
										
										
											2002-01-28 18:47:35 +00:00
+								#include <unicode/ucnv.h>
-												ICU-3230 Fix uconv -l --canon

X-SVN-Rev: 12999
											
										
										
											2003-09-01 06:56:56 +00:00
+								#include <unicode/uenum.h>
-												ICU-1605 for UCNV_ESCAPE_UNICODE, print the codepoint, not the pair of
code units. Also, delimit the U+XXXX with curly braces for now.

X-SVN-Rev: 7514
											
										
										
											2002-01-28 18:47:35 +00:00
+								#include <unicode/unistr.h>
 								#include <unicode/translit.h>
-												ICU-2983 fix conversion loops in uconv; add/remove signature (BOM)

X-SVN-Rev: 14205
											
										
										
											2003-12-21 01:52:09 +00:00
+								#include <unicode/uset.h>
-												ICU-3014 Add explicit u_init() calls to tools and tests in anticipation of static init changes

X-SVN-Rev: 12826
											
										
										
											2003-08-14 21:34:54 +00:00
+								#include <unicode/uclean.h>
-												ICU-1220 Added uconv as an extra (Unix only for now).

X-SVN-Rev: 5825
											
										
										
											2001-09-19 23:43:16 +00:00
-												ICU-1887 Try to fix the name mangling problems with the Intel compiler by including utypes.h before the system headers.

X-SVN-Rev: 9124
											
										
										
											2002-07-12 21:42:24 +00:00
+								#include <stdio.h>
 								#include <errno.h>
 								#include <string.h>
 								#include <stdlib.h>
-												ICU-1605 for UCNV_ESCAPE_UNICODE, print the codepoint, not the pair of
code units. Also, delimit the U+XXXX with curly braces for now.

X-SVN-Rev: 7514
											
										
										
											2002-01-28 18:47:35 +00:00
+								#include "cmemory.h"
-												ICU-1220 support IDs and compound IDs, but also complex rules in the -x
argument.

X-SVN-Rev: 7688
											
										
										
											2002-02-16 07:43:26 +00:00
+								#include "cstring.h"
-												ICU-1220 error reporting for rules parsing errors.

X-SVN-Rev: 7690
											
										
										
											2002-02-16 08:47:31 +00:00
+								#include "ustrfmt.h"
-												ICU-1220 Added uconv as an extra (Unix only for now).

X-SVN-Rev: 5825
											
										
										
											2001-09-19 23:43:16 +00:00
 								#include "unicode/uwmsg.h"
-												ICU-3389 Fix the case when newline translation is or is not used.

X-SVN-Rev: 14751
											
										
										
											2004-03-25 00:36:30 +00:00
+								#if defined(WIN32) || defined(U_CYGWIN)
-												ICU-1220 Added uconv as an extra (Unix only for now).

X-SVN-Rev: 5825
											
										
										
											2001-09-19 23:43:16 +00:00
+								#include <io.h>
 								#include <fcntl.h>
 								#endif
-												ICU-1868 AS/400 changes (allow uconv to be built as static, dll, or common)

X-SVN-Rev: 8808
											
										
										
											2002-06-07 17:08:34 +00:00
+								#ifdef UCONVMSG_LINK
-												ICU-1568 static mode update

X-SVN-Rev: 8040
											
										
										
											2002-03-15 02:02:57 +00:00
+								/* below from the README */
 								#include "unicode/utypes.h"
 								#include "unicode/udata.h"
 								U_CFUNC char uconvmsg_dat[];
-												ICU-1568 use static mode for uconv by default

X-SVN-Rev: 7848
											
										
										
											2002-03-01 22:44:21 +00:00
+								#endif
-												ICU-2983 fix offset handling and error output

X-SVN-Rev: 14206
											
										
										
											2003-12-21 21:49:37 +00:00
+								#define LENGTHOF(array) (int32_t)(sizeof(array)/sizeof((array)[0]))
-												ICU-900 Fixed some compiler warnings.

X-SVN-Rev: 7714
											
										
										
											2002-02-20 00:22:12 +00:00
+								#define DEFAULT_BUFSZ   4096
-												ICU-1568 static mode update

X-SVN-Rev: 8040
											
										
										
											2002-03-15 02:02:57 +00:00
+								#define UCONVMSG "uconvmsg"
-												ICU-1220 Added uconv as an extra (Unix only for now).

X-SVN-Rev: 5825
											
										
										
											2001-09-19 23:43:16 +00:00
-												ICU-900 Fixed some compiler warnings.

X-SVN-Rev: 7714
											
										
										
											2002-02-20 00:22:12 +00:00
+								static UResourceBundle *gBundle = 0;    /* Bundle containing messages. */
-												ICU-1220 Added uconv as an extra (Unix only for now).

X-SVN-Rev: 5825
											
										
										
											2001-09-19 23:43:16 +00:00
-												ICU-1605 for UCNV_ESCAPE_UNICODE, print the codepoint, not the pair of
code units. Also, delimit the U+XXXX with curly braces for now.

X-SVN-Rev: 7514
											
										
										
											2002-01-28 18:47:35 +00:00
+								/*
 								 * Initialize the message bundle so that message strings can be fetched
 								 * by u_wmsg().
 								 *
 								 */
-												ICU-1220 call initMsg() before printing error msg.

X-SVN-Rev: 6724
											
										
										
											2001-11-10 01:53:45 +00:00
 								static void initMsg(const char *pname) {
 								    static int ps = 0;
 								    if (!ps) {
-												ICU-900 Fixed some compiler warnings.

X-SVN-Rev: 7714
											
										
										
											2002-02-20 00:22:12 +00:00
+								        char dataPath[2048];        /* XXX Sloppy: should be PATH_MAX. */
 								        UErrorCode err = U_ZERO_ERROR;
-												ICU-1605 for UCNV_ESCAPE_UNICODE, print the codepoint, not the pair of
code units. Also, delimit the U+XXXX with curly braces for now.

X-SVN-Rev: 7514
											
										
										
											2002-01-28 18:47:35 +00:00
-												ICU-900 Fixed some compiler warnings.

X-SVN-Rev: 7714
											
										
										
											2002-02-20 00:22:12 +00:00
+								        ps = 1;
-												ICU-1605 for UCNV_ESCAPE_UNICODE, print the codepoint, not the pair of
code units. Also, delimit the U+XXXX with curly braces for now.

X-SVN-Rev: 7514
											
										
										
											2002-01-28 18:47:35 +00:00
-												ICU-1568 use static mode for uconv by default

X-SVN-Rev: 7848
											
										
										
											2002-03-01 22:44:21 +00:00
+								        /* Set up our static data - if any */
-												ICU-1868 AS/400 changes (allow uconv to be built as static, dll, or common)

X-SVN-Rev: 8808
											
										
										
											2002-06-07 17:08:34 +00:00
+								#ifdef UCONVMSG_LINK
-												ICU-1220 change u_wmsg(); consistently use it in uconv.

X-SVN-Rev: 8113
											
										
										
											2002-03-19 17:14:10 +00:00
+								        udata_setAppData(UCONVMSG, (const void*) uconvmsg_dat, &err);
-												ICU-1568 use static mode for uconv by default

X-SVN-Rev: 7848
											
										
										
											2002-03-01 22:44:21 +00:00
+								        if (U_FAILURE(err)) {
 								          fprintf(stderr, "%s: warning, problem installing our static resource bundle data uconvmsg: %s - trying anyways.\n",
 								                  pname, u_errorName(err));
 								          err = U_ZERO_ERROR; /* It may still fail */
 								        }
 								#endif
-												ICU-900 Fixed some compiler warnings.

X-SVN-Rev: 7714
											
										
										
											2002-02-20 00:22:12 +00:00
+								        /* Get messages. */
-												ICU-1568 static mode update

X-SVN-Rev: 8040
											
										
										
											2002-03-15 02:02:57 +00:00
+								        gBundle = u_wmsg_setPath(UCONVMSG, &err);
-												ICU-900 Fixed some compiler warnings.

X-SVN-Rev: 7714
											
										
										
											2002-02-20 00:22:12 +00:00
+								        if (U_FAILURE(err)) {
-												ICU-1220 change u_wmsg(); consistently use it in uconv.

X-SVN-Rev: 8113
											
										
										
											2002-03-19 17:14:10 +00:00
+								            fprintf(stderr,
 								                    "%s: warning: couldn't open bundle %s: %s\n",
 								                    pname, UCONVMSG, u_errorName(err));
-												ICU-1992 data packaging, update to uconv

X-SVN-Rev: 9454
											
										
										
											2002-07-31 00:51:06 +00:00
+								#ifdef UCONVMSG_LINK
 								            fprintf(stderr,
 								                    "%s: setAppData was called, internal data %s failed to load\n",
-												ICU-1992 data packaging

X-SVN-Rev: 9475
											
										
										
											2002-07-31 21:06:04 +00:00
+								                        pname, UCONVMSG);
-												ICU-1992 data packaging, update to uconv

X-SVN-Rev: 9454
											
										
										
											2002-07-31 00:51:06 +00:00
+								#endif
-												ICU-1220 change u_wmsg(); consistently use it in uconv.

X-SVN-Rev: 8113
											
										
										
											2002-03-19 17:14:10 +00:00
-												ICU-1568 static mode update

X-SVN-Rev: 8040
											
										
										
											2002-03-15 02:02:57 +00:00
+								            err = U_ZERO_ERROR;
 								            /* that was try #1, try again with a path */
-												ICU-1220 private string functions

X-SVN-Rev: 8204
											
										
										
											2002-03-22 09:51:02 +00:00
+								            uprv_strcpy(dataPath, u_getDataDirectory());
-												ICU-1992 data packaging, update to uconv

X-SVN-Rev: 9454
											
										
										
											2002-07-31 00:51:06 +00:00
+								            uprv_strcat(dataPath, U_FILE_SEP_STRING);
-												ICU-1220 private string functions

X-SVN-Rev: 8204
											
										
										
											2002-03-22 09:51:02 +00:00
+								            uprv_strcat(dataPath, UCONVMSG);
-												ICU-1568 static mode update

X-SVN-Rev: 8040
											
										
										
											2002-03-15 02:02:57 +00:00
 								            gBundle = u_wmsg_setPath(dataPath, &err);
 								            if (U_FAILURE(err)) {
 								                fprintf(stderr,
-												ICU-1992 data packaging, update to uconv

X-SVN-Rev: 9454
											
										
										
											2002-07-31 00:51:06 +00:00
+								                    "%s: warning: still couldn't open bundle %s: %s\n",
-												ICU-1568 static mode update

X-SVN-Rev: 8040
											
										
										
											2002-03-15 02:02:57 +00:00
+								                    pname, dataPath, u_errorName(err));
-												ICU-1828 Remove tabs

X-SVN-Rev: 8328
											
										
										
											2002-04-02 01:17:28 +00:00
+								                fprintf(stderr, "%s: warning: messages will not be displayed\n", pname);
-												ICU-1568 static mode update

X-SVN-Rev: 8040
											
										
										
											2002-03-15 02:02:57 +00:00
+								            }
-												ICU-900 Fixed some compiler warnings.

X-SVN-Rev: 7714
											
										
										
											2002-02-20 00:22:12 +00:00
+								        }
-												ICU-1220 call initMsg() before printing error msg.

X-SVN-Rev: 6724
											
										
										
											2001-11-10 01:53:45 +00:00
+								    }
 								}
-												ICU-1605 for UCNV_ESCAPE_UNICODE, print the codepoint, not the pair of
code units. Also, delimit the U+XXXX with curly braces for now.

X-SVN-Rev: 7514
											
										
										
											2002-01-28 18:47:35 +00:00
+								/* Mapping of callback names to the callbacks passed to the converter
 								   API. */
-												ICU-1220 uconv now supports setting callbacks in both directions; see
the manual page, or call uconv -h to have a list (though the manual
page also has explanations for the different callbacks).

X-SVN-Rev: 7403
											
										
										
											2002-01-08 02:34:33 +00:00
+								static struct callback_ent {
 								    const char *name;
 								    UConverterFromUCallback fromu;
 								    const void *fromuctxt;
 								    UConverterToUCallback tou;
 								    const void *touctxt;
 								} transcode_callbacks[] = {
-												ICU-1605 for UCNV_ESCAPE_UNICODE, print the codepoint, not the pair of
code units. Also, delimit the U+XXXX with curly braces for now.

X-SVN-Rev: 7514
											
										
										
											2002-01-28 18:47:35 +00:00
+								    { "substitute",
 								      UCNV_FROM_U_CALLBACK_SUBSTITUTE, 0,
 								      UCNV_TO_U_CALLBACK_SUBSTITUTE, 0 },
 								    { "skip",
 								      UCNV_FROM_U_CALLBACK_SKIP, 0,
 								      UCNV_TO_U_CALLBACK_SKIP, 0 },
 								    { "stop",
 								      UCNV_FROM_U_CALLBACK_STOP, 0,
 								      UCNV_TO_U_CALLBACK_STOP, 0 },
 								    { "escape",
 								      UCNV_FROM_U_CALLBACK_ESCAPE, 0,
 								      UCNV_TO_U_CALLBACK_ESCAPE, 0},
 								    { "escape-icu",
 								      UCNV_FROM_U_CALLBACK_ESCAPE, UCNV_ESCAPE_ICU,
 								      UCNV_TO_U_CALLBACK_ESCAPE, UCNV_ESCAPE_ICU },
 								    { "escape-java",
 								      UCNV_FROM_U_CALLBACK_ESCAPE, UCNV_ESCAPE_JAVA,
 								      UCNV_TO_U_CALLBACK_ESCAPE, UCNV_ESCAPE_JAVA },
 								    { "escape-c",
 								      UCNV_FROM_U_CALLBACK_ESCAPE, UCNV_ESCAPE_C,
 								      UCNV_TO_U_CALLBACK_ESCAPE, UCNV_ESCAPE_C },
 								    { "escape-xml",
-												ICU-1220 make escape-xml the same as escape-xml-hex by default instead
of escape-xml-dec.

X-SVN-Rev: 7551
											
										
										
											2002-02-01 07:39:57 +00:00
+								      UCNV_FROM_U_CALLBACK_ESCAPE, UCNV_ESCAPE_XML_HEX,
 								      UCNV_TO_U_CALLBACK_ESCAPE, UCNV_ESCAPE_XML_HEX },
-												ICU-1605 for UCNV_ESCAPE_UNICODE, print the codepoint, not the pair of
code units. Also, delimit the U+XXXX with curly braces for now.

X-SVN-Rev: 7514
											
										
										
											2002-01-28 18:47:35 +00:00
+								    { "escape-xml-hex",
 								      UCNV_FROM_U_CALLBACK_ESCAPE, UCNV_ESCAPE_XML_HEX,
 								      UCNV_TO_U_CALLBACK_ESCAPE, UCNV_ESCAPE_XML_HEX },
-												ICU-1220 make escape-xml the same as escape-xml-hex by default instead
of escape-xml-dec.

X-SVN-Rev: 7551
											
										
										
											2002-02-01 07:39:57 +00:00
+								    { "escape-xml-dec",
 								      UCNV_FROM_U_CALLBACK_ESCAPE, UCNV_ESCAPE_XML_DEC,
 								      UCNV_TO_U_CALLBACK_ESCAPE, UCNV_ESCAPE_XML_DEC },
-												ICU-1605 for UCNV_ESCAPE_UNICODE, print the codepoint, not the pair of
code units. Also, delimit the U+XXXX with curly braces for now.

X-SVN-Rev: 7514
											
										
										
											2002-01-28 18:47:35 +00:00
+								    { "escape-unicode", UCNV_FROM_U_CALLBACK_ESCAPE, UCNV_ESCAPE_UNICODE,
 								      UCNV_TO_U_CALLBACK_ESCAPE, UCNV_ESCAPE_UNICODE }
-												ICU-1220 uconv now supports setting callbacks in both directions; see
the manual page, or call uconv -h to have a list (though the manual
page also has explanations for the different callbacks).

X-SVN-Rev: 7403
											
										
										
											2002-01-08 02:34:33 +00:00
+								};
-												ICU-1605 for UCNV_ESCAPE_UNICODE, print the codepoint, not the pair of
code units. Also, delimit the U+XXXX with curly braces for now.

X-SVN-Rev: 7514
											
										
										
											2002-01-28 18:47:35 +00:00
+								/* Return a pointer to a callback record given its name. */
-												ICU-1220 uconv now supports setting callbacks in both directions; see
the manual page, or call uconv -h to have a list (though the manual
page also has explanations for the different callbacks).

X-SVN-Rev: 7403
											
										
										
											2002-01-08 02:34:33 +00:00
+								static const struct callback_ent *findCallback(const char *name) {
-												ICU-1605 for UCNV_ESCAPE_UNICODE, print the codepoint, not the pair of
code units. Also, delimit the U+XXXX with curly braces for now.

X-SVN-Rev: 7514
											
										
										
											2002-01-28 18:47:35 +00:00
+								    int i, count =
-												ICU-900 Fixed some compiler warnings.

X-SVN-Rev: 7714
											
										
										
											2002-02-20 00:22:12 +00:00
+								        sizeof(transcode_callbacks) / sizeof(*transcode_callbacks);
-												ICU-1220 uconv now supports setting callbacks in both directions; see
the manual page, or call uconv -h to have a list (though the manual
page also has explanations for the different callbacks).

X-SVN-Rev: 7403
											
										
										
											2002-01-08 02:34:33 +00:00
 								    /* We'll do a linear search, there aren't many of them and bsearch()
 								       may not be that portable. */
 								    for (i = 0; i < count; ++i) {
-												ICU-1220 case-insensitive compare of callabck names.

X-SVN-Rev: 8087
											
										
										
											2002-03-18 17:12:52 +00:00
+								        if (!uprv_stricmp(name, transcode_callbacks[i].name)) {
-												ICU-900 Fixed some compiler warnings.

X-SVN-Rev: 7714
											
										
										
											2002-02-20 00:22:12 +00:00
+								            return &transcode_callbacks[i];
 								        }
-												ICU-1220 uconv now supports setting callbacks in both directions; see
the manual page, or call uconv -h to have a list (though the manual
page also has explanations for the different callbacks).

X-SVN-Rev: 7403
											
										
										
											2002-01-08 02:34:33 +00:00
+								    }
 								    return 0;
 								}
-												ICU-1605 for UCNV_ESCAPE_UNICODE, print the codepoint, not the pair of
code units. Also, delimit the U+XXXX with curly braces for now.

X-SVN-Rev: 7514
											
										
										
											2002-01-28 18:47:35 +00:00
+								/* Print converter information. If lookfor is set, only that converter will
 								   be printed, otherwise all converters will be printed. If canon is non
 								   zero, tags and aliases for each converter are printed too, in the format
 								   expected for convrters.txt(5). */
 								static int printConverters(const char *pname, const char *lookfor,
-												ICU-2983 fix conversion loops in uconv; add/remove signature (BOM)

X-SVN-Rev: 14205
											
										
										
											2003-12-21 01:52:09 +00:00
+								    UBool canon)
-												ICU-900 Fixed some compiler warnings.

X-SVN-Rev: 7714
											
										
										
											2002-02-20 00:22:12 +00:00
+								{
-												ICU-1220 Added uconv as an extra (Unix only for now).

X-SVN-Rev: 5825
											
										
										
											2001-09-19 23:43:16 +00:00
+								    UErrorCode err = U_ZERO_ERROR;
-												ICU-1104 add a --default-code option.

X-SVN-Rev: 7383
											
										
										
											2002-01-05 00:28:34 +00:00
+								    int32_t num;
 								    uint16_t num_stds;
 								    const char **stds;
-												ICU-1605 for UCNV_ESCAPE_UNICODE, print the codepoint, not the pair of
code units. Also, delimit the U+XXXX with curly braces for now.

X-SVN-Rev: 7514
											
										
										
											2002-01-28 18:47:35 +00:00
+								    /* If there is a specified name, just handle that now. */
-												ICU-1104 add a --list-code code option to generalize the --default-code
one. A good use of this option is to know if a given name is valid, by
calling 'uconv --list-code name 2>/dev/null' and checking for the exit
code. Maybe a -q option is in order to make that easier.

X-SVN-Rev: 7386
											
										
										
											2002-01-05 01:08:01 +00:00
+								    if (lookfor) {
-												ICU-900 Fixed some compiler warnings.

X-SVN-Rev: 7714
											
										
										
											2002-02-20 00:22:12 +00:00
+								        if (!canon) {
 								            printf("%s\n", lookfor);
 								            return 0;
 								        } else {
 								        /*  Because we are printing a canonical name, we need the
 								            true converter name. We've done that already except for
 								            the default name (because we want to print the exact
 								            name one would get when calling ucnv_getDefaultName()
 								            in non-canon mode). But since we do not know at this
 								            point if we have the default name or something else, we
 								            need to normalize again to the canonical converter
 								            name. */
 								            const char *truename = ucnv_getAlias(lookfor, 0, &err);
 								            if (U_SUCCESS(err)) {
 								                lookfor = truename;
 								            } else {
 								                err = U_ZERO_ERROR;
 								            }
 								        }
-												ICU-1104 add a --default-code option.

X-SVN-Rev: 7383
											
										
										
											2002-01-05 00:28:34 +00:00
+								    }
-												ICU-1605 for UCNV_ESCAPE_UNICODE, print the codepoint, not the pair of
code units. Also, delimit the U+XXXX with curly braces for now.

X-SVN-Rev: 7514
											
										
										
											2002-01-28 18:47:35 +00:00
+								    /* Print converter names. We come here for one of two reasons: we
 								       are printing all the names (lookfor was null), or we have a
 								       single converter to print but in canon mode, hence we need to
 								       get to it in order to print everything. */
-												ICU-1104 add a --default-code option.

X-SVN-Rev: 7383
											
										
										
											2002-01-05 00:28:34 +00:00
+								    num = ucnv_countAvailable();
-												ICU-1605 for UCNV_ESCAPE_UNICODE, print the codepoint, not the pair of
code units. Also, delimit the U+XXXX with curly braces for now.

X-SVN-Rev: 7514
											
										
										
											2002-01-28 18:47:35 +00:00
+								    if (num <= 0) {
-												ICU-900 Fixed some compiler warnings.

X-SVN-Rev: 7714
											
										
										
											2002-02-20 00:22:12 +00:00
+								        initMsg(pname);
-												ICU-1220 change u_wmsg(); consistently use it in uconv.

X-SVN-Rev: 8113
											
										
										
											2002-03-19 17:14:10 +00:00
+								        u_wmsg(stderr, "cantGetNames");
-												ICU-900 Fixed some compiler warnings.

X-SVN-Rev: 7714
											
										
										
											2002-02-20 00:22:12 +00:00
+								        return -1;
-												ICU-1605 for UCNV_ESCAPE_UNICODE, print the codepoint, not the pair of
code units. Also, delimit the U+XXXX with curly braces for now.

X-SVN-Rev: 7514
											
										
										
											2002-01-28 18:47:35 +00:00
+								    }
 								    if (lookfor) {
 								        num = 1;                /* We know where we want to be. */
 								    }
-												ICU-1104 add a --default-code option.

X-SVN-Rev: 7383
											
										
										
											2002-01-05 00:28:34 +00:00
+								    num_stds = ucnv_countStandards();
 								    stds = (const char **) uprv_malloc(num_stds * sizeof(*stds));
-												ICU-1220 implementation of -c option.

X-SVN-Rev: 6821
											
										
										
											2001-11-13 07:22:18 +00:00
+								    if (!stds) {
-												ICU-1220 change u_wmsg(); consistently use it in uconv.

X-SVN-Rev: 8113
											
										
										
											2002-03-19 17:14:10 +00:00
+								        u_wmsg(stderr, "cantGetTag", u_wmsg_errorName(U_MEMORY_ALLOCATION_ERROR));
-												ICU-900 Fixed some compiler warnings.

X-SVN-Rev: 7714
											
										
										
											2002-02-20 00:22:12 +00:00
+								        return -1;
-												ICU-1220 implementation of -c option.

X-SVN-Rev: 6821
											
										
										
											2001-11-13 07:22:18 +00:00
+								    } else {
-												ICU-900 Fixed some compiler warnings.

X-SVN-Rev: 7714
											
										
										
											2002-02-20 00:22:12 +00:00
+								        uint16_t s;
-												ICU-3230 Fix uconv -l --canon

X-SVN-Rev: 12999
											
										
										
											2003-09-01 06:56:56 +00:00
+								        if (canon) {
 								            printf("{ ");
 								        }
-												ICU-900 Fixed some compiler warnings.

X-SVN-Rev: 7714
											
										
										
											2002-02-20 00:22:12 +00:00
+								        for (s = 0; s < num_stds; ++s) {
 								            stds[s] = ucnv_getStandard(s, &err);
-												ICU-3230 Fix uconv -l --canon

X-SVN-Rev: 12999
											
										
										
											2003-09-01 06:56:56 +00:00
+								            if (canon) {
 								                printf("%s ", stds[s]);
 								            }
-												ICU-900 Fixed some compiler warnings.

X-SVN-Rev: 7714
											
										
										
											2002-02-20 00:22:12 +00:00
+								            if (U_FAILURE(err)) {
-												ICU-1220 change u_wmsg(); consistently use it in uconv.

X-SVN-Rev: 8113
											
										
										
											2002-03-19 17:14:10 +00:00
+								                u_wmsg(stderr, "cantGetTag", u_wmsg_errorName(err));
-												ICU-900 Fixed some compiler warnings.

X-SVN-Rev: 7714
											
										
										
											2002-02-20 00:22:12 +00:00
+								                return -1;
 								            }
 								        }
-												ICU-3230 Fix uconv -l --canon

X-SVN-Rev: 12999
											
										
										
											2003-09-01 06:56:56 +00:00
+								        if (canon) {
 								            puts("}");
 								        }
-												ICU-1605 for UCNV_ESCAPE_UNICODE, print the codepoint, not the pair of
code units. Also, delimit the U+XXXX with curly braces for now.

X-SVN-Rev: 7514
											
										
										
											2002-01-28 18:47:35 +00:00
+								    }
 								    for (int32_t i = 0; i < num; i++) {
-												ICU-900 Fixed some compiler warnings.

X-SVN-Rev: 7714
											
										
										
											2002-02-20 00:22:12 +00:00
+								        const char *name;
 								        uint16_t num_aliases;
-												ICU-1605 for UCNV_ESCAPE_UNICODE, print the codepoint, not the pair of
code units. Also, delimit the U+XXXX with curly braces for now.

X-SVN-Rev: 7514
											
										
										
											2002-01-28 18:47:35 +00:00
 								        /* Set the name either to what we are looking for, or
-												ICU-900 Fixed some compiler warnings.

X-SVN-Rev: 7714
											
										
										
											2002-02-20 00:22:12 +00:00
+								        to the current converter name. */
-												ICU-1605 for UCNV_ESCAPE_UNICODE, print the codepoint, not the pair of
code units. Also, delimit the U+XXXX with curly braces for now.

X-SVN-Rev: 7514
											
										
										
											2002-01-28 18:47:35 +00:00
-												ICU-900 Fixed some compiler warnings.

X-SVN-Rev: 7714
											
										
										
											2002-02-20 00:22:12 +00:00
+								        if (lookfor) {
-												ICU-1605 for UCNV_ESCAPE_UNICODE, print the codepoint, not the pair of
code units. Also, delimit the U+XXXX with curly braces for now.

X-SVN-Rev: 7514
											
										
										
											2002-01-28 18:47:35 +00:00
+								            name = lookfor;
 								        } else {
 								            name = ucnv_getAvailableName(i);
 								        }
 								        /* Get all the aliases associated to the name. */
-												ICU-900 Fixed some compiler warnings.

X-SVN-Rev: 7714
											
										
										
											2002-02-20 00:22:12 +00:00
+								        err = U_ZERO_ERROR;
 								        num_aliases = ucnv_countAliases(name, &err);
 								        if (U_FAILURE(err)) {
 								            printf("%s", name);
-												ICU-1605 for UCNV_ESCAPE_UNICODE, print the codepoint, not the pair of
code units. Also, delimit the U+XXXX with curly braces for now.

X-SVN-Rev: 7514
											
										
										
											2002-01-28 18:47:35 +00:00
-												ICU-2983 fix offset handling and error output

X-SVN-Rev: 14206
											
										
										
											2003-12-21 21:49:37 +00:00
+								            UnicodeString str(name, "");
-												ICU-900 Fixed some compiler warnings.

X-SVN-Rev: 7714
											
										
										
											2002-02-20 00:22:12 +00:00
+								            putchar('\t');
-												ICU-2983 fix conversion loops in uconv; add/remove signature (BOM)

X-SVN-Rev: 14205
											
										
										
											2003-12-21 01:52:09 +00:00
+								            u_wmsg(stderr, "cantGetAliases", str.getTerminatedBuffer(),
-												ICU-900 Fixed some compiler warnings.

X-SVN-Rev: 7714
											
										
										
											2002-02-20 00:22:12 +00:00
+								                u_wmsg_errorName(err));
 								            return -1;
 								        } else {
 								            uint16_t a, s, t;
-												ICU-1605 for UCNV_ESCAPE_UNICODE, print the codepoint, not the pair of
code units. Also, delimit the U+XXXX with curly braces for now.

X-SVN-Rev: 7514
											
										
										
											2002-01-28 18:47:35 +00:00
 								            /* Write all the aliases and their tags. */
-												ICU-900 Fixed some compiler warnings.

X-SVN-Rev: 7714
											
										
										
											2002-02-20 00:22:12 +00:00
+								            for (a = 0; a < num_aliases; ++a) {
 								                const char *alias = ucnv_getAlias(name, a, &err);
 								                if (U_FAILURE(err)) {
-												ICU-2983 fix offset handling and error output

X-SVN-Rev: 14206
											
										
										
											2003-12-21 21:49:37 +00:00
+								                    UnicodeString str(name, "");
-												ICU-900 Fixed some compiler warnings.

X-SVN-Rev: 7714
											
										
										
											2002-02-20 00:22:12 +00:00
+								                    putchar('\t');
-												ICU-2983 fix conversion loops in uconv; add/remove signature (BOM)

X-SVN-Rev: 14205
											
										
										
											2003-12-21 01:52:09 +00:00
+								                    u_wmsg(stderr, "cantGetAliases", str.getTerminatedBuffer(),
-												ICU-900 Fixed some compiler warnings.

X-SVN-Rev: 7714
											
										
										
											2002-02-20 00:22:12 +00:00
+								                        u_wmsg_errorName(err));
 								                    return -1;
 								                }
-												ICU-3230 Fix uconv -l --canon

X-SVN-Rev: 12999
											
										
										
											2003-09-01 06:56:56 +00:00
+								                /* Print the current alias so that it looks right. */
 								                printf("%s%s%s", (canon ? (a == 0? "" : "\t" ) : "") ,
 								                                 alias,
 								                                 (canon ? "" : " "));
-												ICU-900 Fixed some compiler warnings.

X-SVN-Rev: 7714
											
										
										
											2002-02-20 00:22:12 +00:00
 								                /* Look (slowly, linear searching) for a tag. */
 								                if (canon) {
-												ICU-3230 Fix uconv -l --canon

X-SVN-Rev: 12999
											
										
										
											2003-09-01 06:56:56 +00:00
+								                    /* -1 to skip the last standard */
 								                    for (s = t = 0; s < num_stds-1; ++s) {
 								                        UEnumeration *nameEnum = ucnv_openStandardNames(name, stds[s], &err);
 								                        if (U_SUCCESS(err)) {
 								                            /* List the standard tags */
 								                            const char *standardName;
 								                            UBool isFirst = TRUE;
 								                            UErrorCode enumError = U_ZERO_ERROR;
 								                            while ((standardName = uenum_next(nameEnum, NULL, &enumError))) {
 								                                /* See if this alias is supported by this standard. */
 								                                if (!strcmp(standardName, alias)) {
 								                                    if (!t) {
 								                                        printf(" {");
 								                                        t = 1;
 								                                    }
 								                                    /* Print a * after the default standard name */
 								                                    printf(" %s%s", stds[s], (isFirst ? "*" : ""));
-												ICU-900 Fixed some compiler warnings.

X-SVN-Rev: 7714
											
										
										
											2002-02-20 00:22:12 +00:00
+								                                }
-												ICU-3230 Fix uconv -l --canon

X-SVN-Rev: 12999
											
										
										
											2003-09-01 06:56:56 +00:00
+								                                isFirst = FALSE;
-												ICU-900 Fixed some compiler warnings.

X-SVN-Rev: 7714
											
										
										
											2002-02-20 00:22:12 +00:00
+								                            }
 								                        }
 								                    }
 								                    if (t) {
 								                        printf(" }");
 								                    }
 								                }
-												ICU-3230 Fix uconv -l --canon

X-SVN-Rev: 12999
											
										
										
											2003-09-01 06:56:56 +00:00
+								                /* Terminate this entry. */
 								                if (canon) {
 								                    puts("");
 								                }
-												ICU-900 Fixed some compiler warnings.

X-SVN-Rev: 7714
											
										
										
											2002-02-20 00:22:12 +00:00
 								                /* Move on. */
 								            }
-												ICU-3230 Fix uconv -l --canon

X-SVN-Rev: 12999
											
										
										
											2003-09-01 06:56:56 +00:00
+								            /* Terminate this entry. */
 								            if (!canon) {
 								                puts("");
 								            }
-												ICU-900 Fixed some compiler warnings.

X-SVN-Rev: 7714
											
										
										
											2002-02-20 00:22:12 +00:00
+								        }
-												ICU-1220 implementation of -c option.

X-SVN-Rev: 6821
											
										
										
											2001-11-13 07:22:18 +00:00
+								    }
-												ICU-1605 for UCNV_ESCAPE_UNICODE, print the codepoint, not the pair of
code units. Also, delimit the U+XXXX with curly braces for now.

X-SVN-Rev: 7514
											
										
										
											2002-01-28 18:47:35 +00:00
 								    /* Free temporary data. */
 								    uprv_free(stds);
 								    /* Success. */
-												ICU-1220 implementation of -c option.

X-SVN-Rev: 6821
											
										
										
											2001-11-13 07:22:18 +00:00
 								    return 0;
-												ICU-1220 Added uconv as an extra (Unix only for now).

X-SVN-Rev: 5825
											
										
										
											2001-09-19 23:43:16 +00:00
+								}
-												ICU-1605 for UCNV_ESCAPE_UNICODE, print the codepoint, not the pair of
code units. Also, delimit the U+XXXX with curly braces for now.

X-SVN-Rev: 7514
											
										
										
											2002-01-28 18:47:35 +00:00
+								/* Print all available transliterators. If canon is non zero, print
 								   one transliterator per line. */
-												ICU-2983 fix conversion loops in uconv; add/remove signature (BOM)

X-SVN-Rev: 14205
											
										
										
											2003-12-21 01:52:09 +00:00
+								static int printTransliterators(UBool canon)
-												ICU-1605 for UCNV_ESCAPE_UNICODE, print the codepoint, not the pair of
code units. Also, delimit the U+XXXX with curly braces for now.

X-SVN-Rev: 7514
											
										
										
											2002-01-28 18:47:35 +00:00
+								{
-												ICU-2248 modularize ICU

X-SVN-Rev: 9934
											
										
										
											2002-10-01 01:26:49 +00:00
+								#if UCONFIG_NO_TRANSLITERATION
 								    printf("no transliterators available because of UCONFIG_NO_TRANSLITERATION, see uconfig.h\n");
 								    return 1;
 								#else
-												ICU-1220 implemented -L, --list-transliterators.
made the -x transliterator option work.
updated manual page and resource bundle to reflect that.

X-SVN-Rev: 7396
											
										
										
											2002-01-07 20:45:00 +00:00
+								    int32_t numtrans = utrans_countAvailableIDs(), i;
-												ICU-1220 if --canon is used with -L, --list-transliterators, print only
one name per line (makes grepping very easy).
Ensure that we can handle transliterators names of arbitrary length (as
big as memory will allow).

X-SVN-Rev: 7398
											
										
										
											2002-01-07 21:05:30 +00:00
+								    int buflen = 512;
 								    char *buf = (char *) uprv_malloc(buflen);
 								    char staticbuf[512];
 								    char sepchar = canon ? '\n' : ' ';
 								    if (!buf) {
-												ICU-900 Fixed some compiler warnings.

X-SVN-Rev: 7714
											
										
										
											2002-02-20 00:22:12 +00:00
+								        buf = staticbuf;
 								        buflen = sizeof(staticbuf);
-												ICU-1220 if --canon is used with -L, --list-transliterators, print only
one name per line (makes grepping very easy).
Ensure that we can handle transliterators names of arbitrary length (as
big as memory will allow).

X-SVN-Rev: 7398
											
										
										
											2002-01-07 21:05:30 +00:00
+								    }
-												ICU-1220 implemented -L, --list-transliterators.
made the -x transliterator option work.
updated manual page and resource bundle to reflect that.

X-SVN-Rev: 7396
											
										
										
											2002-01-07 20:45:00 +00:00
+								    for (i = 0; i < numtrans; ++i) {
-												ICU-900 Fixed some compiler warnings.

X-SVN-Rev: 7714
											
										
										
											2002-02-20 00:22:12 +00:00
+								        int32_t len = utrans_getAvailableID(i, buf, buflen);
 								        if (len >= buflen - 1) {
 								            if (buf != staticbuf) {
 								                buflen <<= 1;
 								                if (buflen < len) {
 								                    buflen = len + 64;
 								                }
 								                buf = (char *) uprv_realloc(buf, buflen);
 								                if (!buf) {
 								                    buf = staticbuf;
 								                    buflen = sizeof(staticbuf);
 								                }
 								            }
 								            utrans_getAvailableID(i, buf, buflen);
 								            if (len >= buflen) {
-												ICU-1220 private string functions

X-SVN-Rev: 8204
											
										
										
											2002-03-22 09:51:02 +00:00
+								                uprv_strcpy(buf + buflen - 4, "..."); /* Truncate the name. */
-												ICU-900 Fixed some compiler warnings.

X-SVN-Rev: 7714
											
										
										
											2002-02-20 00:22:12 +00:00
+								            }
 								        }
 								        printf("%s", buf);
 								        if (i < numtrans - 1) {
 								            putchar(sepchar);
 								        }
-												ICU-1220 implemented -L, --list-transliterators.
made the -x transliterator option work.
updated manual page and resource bundle to reflect that.

X-SVN-Rev: 7396
											
										
										
											2002-01-07 20:45:00 +00:00
+								    }
-												ICU-1605 for UCNV_ESCAPE_UNICODE, print the codepoint, not the pair of
code units. Also, delimit the U+XXXX with curly braces for now.

X-SVN-Rev: 7514
											
										
										
											2002-01-28 18:47:35 +00:00
+								    /* Add a terminating newline if needed. */
-												ICU-1220 if --canon is used with -L, --list-transliterators, print only
one name per line (makes grepping very easy).
Ensure that we can handle transliterators names of arbitrary length (as
big as memory will allow).

X-SVN-Rev: 7398
											
										
										
											2002-01-07 21:05:30 +00:00
+								    if (sepchar != '\n') {
-												ICU-900 Fixed some compiler warnings.

X-SVN-Rev: 7714
											
										
										
											2002-02-20 00:22:12 +00:00
+								        putchar('\n');
-												ICU-1220 if --canon is used with -L, --list-transliterators, print only
one name per line (makes grepping very easy).
Ensure that we can handle transliterators names of arbitrary length (as
big as memory will allow).

X-SVN-Rev: 7398
											
										
										
											2002-01-07 21:05:30 +00:00
+								    }
-												ICU-1605 for UCNV_ESCAPE_UNICODE, print the codepoint, not the pair of
code units. Also, delimit the U+XXXX with curly braces for now.

X-SVN-Rev: 7514
											
										
										
											2002-01-28 18:47:35 +00:00
+								    /* Free temporary data. */
-												ICU-1220 if --canon is used with -L, --list-transliterators, print only
one name per line (makes grepping very easy).
Ensure that we can handle transliterators names of arbitrary length (as
big as memory will allow).

X-SVN-Rev: 7398
											
										
										
											2002-01-07 21:05:30 +00:00
+								    if (buf != staticbuf) {
-												ICU-900 Fixed some compiler warnings.

X-SVN-Rev: 7714
											
										
										
											2002-02-20 00:22:12 +00:00
+								        uprv_free(buf);
-												ICU-1220 if --canon is used with -L, --list-transliterators, print only
one name per line (makes grepping very easy).
Ensure that we can handle transliterators names of arbitrary length (as
big as memory will allow).

X-SVN-Rev: 7398
											
										
										
											2002-01-07 21:05:30 +00:00
+								    }
-												ICU-1605 for UCNV_ESCAPE_UNICODE, print the codepoint, not the pair of
code units. Also, delimit the U+XXXX with curly braces for now.

X-SVN-Rev: 7514
											
										
										
											2002-01-28 18:47:35 +00:00
+								    /* Success. */
-												ICU-1220 implemented -L, --list-transliterators.
made the -x transliterator option work.
updated manual page and resource bundle to reflect that.

X-SVN-Rev: 7396
											
										
										
											2002-01-07 20:45:00 +00:00
+								    return 0;
-												ICU-2248 modularize ICU

X-SVN-Rev: 9934
											
										
										
											2002-10-01 01:26:49 +00:00
+								#endif
-												ICU-1220 implemented -L, --list-transliterators.
made the -x transliterator option work.
updated manual page and resource bundle to reflect that.

X-SVN-Rev: 7396
											
										
										
											2002-01-07 20:45:00 +00:00
+								}
-												ICU-2983 improve transforms with paragraph chunking

X-SVN-Rev: 14208
											
										
										
											2003-12-22 19:11:42 +00:00
+								enum {
 								    uSP = 0x20,         // space
 								    uCR = 0xd,          // carriage return
 								    uLF = 0xa,          // line feed
 								    uNL = 0x85,         // newline
 								    uLS = 0x2028,       // line separator
 								    uPS = 0x2029,       // paragraph separator
 								    uSig = 0xfeff       // signature/BOM character
 								};
 								static inline int32_t
 								getChunkLimit(const UnicodeString &prev, const UnicodeString &s) {
 								    // find one of
 								    // CR, LF, CRLF, NL, LS, PS
 								    // for paragraph ends (see UAX #13/Unicode 4)
 								    // and include it in the chunk
 								    // all of these characters are on the BMP
 								    // do not include FF or VT in case they are part of a paragraph
 								    // (important for bidi contexts)
 								    static const UChar paraEnds[] = {
 xd, 0xa, 0x85, 0x2028, 0x2029
 								    };
 								    enum {
 								        iCR, iLF, iNL, iLS, iPS, iCount
 								    };
 								    // first, see if there is a CRLF split between prev and s
 								    if (prev.endsWith(paraEnds + iCR, 1)) {
 								        if (s.startsWith(paraEnds + iLF, 1)) {
 								            return 1; // split CRLF, include the LF
 								        } else if (!s.isEmpty()) {
 								            return 0; // complete the last chunk
 								        } else {
 								            return -1; // wait for actual further contents to arrive
 								        }
 								    }
 								    const UChar *u = s.getBuffer(), *limit = u + s.length();
 								    UChar c;
 								    while (u < limit) {
 								        c = *u++;
 								        if (
 								            ((c < uSP) && (c == uCR || c == uLF)) ||
 								            (c == uNL) ||
 								            ((c & uLS) == uLS)
 								        ) {
 								            if (c == uCR) {
 								                // check for CRLF
 								                if (u == limit) {
 								                    return -1; // LF may be in the next chunk
 								                } else if (*u == uLF) {
 								                    ++u; // include the LF in this chunk
 								                }
 								            }
 								            return (int32_t)(u - s.getBuffer());
 								        }
 								    }
 								    return -1; // continue collecting the chunk
 								}
-												ICU-2983 fix conversion loops in uconv; add/remove signature (BOM)

X-SVN-Rev: 14205
											
										
										
											2003-12-21 01:52:09 +00:00
+								enum {
 								    CNV_NO_FEFF,    // cannot convert the U+FEFF Unicode signature character (BOM)
 								    CNV_WITH_FEFF,  // can convert the U+FEFF signature character
 								    CNV_ADDS_FEFF   // automatically adds/detects the U+FEFF signature character
 								};
-												ICU-2983 improve transforms with paragraph chunking

X-SVN-Rev: 14208
											
										
										
											2003-12-22 19:11:42 +00:00
+								static inline UChar
-												ICU-2983 fix offset handling and error output

X-SVN-Rev: 14206
											
										
										
											2003-12-21 21:49:37 +00:00
+								nibbleToHex(uint8_t n) {
 								    n &= 0xf;
 								    return
 								        n <= 9 ?
 								            (UChar)(0x30 + n) :
 								            (UChar)((0x61 - 10) + n);
 								}
-												ICU-2983 fix conversion loops in uconv; add/remove signature (BOM)

X-SVN-Rev: 14205
											
										
										
											2003-12-21 01:52:09 +00:00
+								// check the converter's Unicode signature properties;
 								// the fromUnicode side of the converter must be in its initial state
 								// and will be reset again if it was used
 								static int32_t
 								cnvSigType(UConverter *cnv) {
 								    UErrorCode err;
 								    int32_t result;
 								    // test if the output charset can convert U+FEFF
 								    USet *set = uset_open(1, 0);
 								    err = U_ZERO_ERROR;
 								    ucnv_getUnicodeSet(cnv, set, UCNV_ROUNDTRIP_SET, &err);
-												ICU-2983 improve transforms with paragraph chunking

X-SVN-Rev: 14208
											
										
										
											2003-12-22 19:11:42 +00:00
+								    if (U_SUCCESS(err) && uset_contains(set, uSig)) {
-												ICU-2983 fix conversion loops in uconv; add/remove signature (BOM)

X-SVN-Rev: 14205
											
										
										
											2003-12-21 01:52:09 +00:00
+								        result = CNV_WITH_FEFF;
 								    } else {
 								        result = CNV_NO_FEFF; // an error occurred or U+FEFF cannot be converted
 								    }
 								    uset_close(set);
 								    if (result == CNV_WITH_FEFF) {
 								        // test if the output charset emits a signature anyway
 								        const UChar a[1] = { 0x61 }; // "a"
 								        const UChar *in;
 								        char buffer[20];
 								        char *out;
 								        in = a;
 								        out = buffer;
 								        err = U_ZERO_ERROR;
 								        ucnv_fromUnicode(cnv,
 								            &out, buffer + sizeof(buffer),
 								            &in, a + 1,
 								            NULL, TRUE, &err);
 								        ucnv_resetFromUnicode(cnv);
 								        if (NULL != ucnv_detectUnicodeSignature(buffer, (int32_t)(out - buffer), NULL, &err) &&
 								            U_SUCCESS(err)
 								        ) {
 								            result = CNV_ADDS_FEFF;
 								        }
 								    }
 								    return result;
 								}
 								class ConvertFile {
 								public:
 								    ConvertFile() :
-												ICU-2983 use separate in/out buffers in case the input is not converted in a single step

X-SVN-Rev: 14217
											
										
										
											2003-12-24 18:08:39 +00:00
+								        buf(NULL), outbuf(NULL), fromoffsets(NULL),
-												ICU-2983 fix conversion loops in uconv; add/remove signature (BOM)

X-SVN-Rev: 14205
											
										
										
											2003-12-21 01:52:09 +00:00
+								        bufsz(0), signature(0) {}
 								    void
 								    setBufferSize(size_t bufferSize) {
 								        bufsz = bufferSize;
-												ICU-2983 use separate in/out buffers in case the input is not converted in a single step

X-SVN-Rev: 14217
											
										
										
											2003-12-24 18:08:39 +00:00
+								        buf = new char[2 * bufsz];
 								        outbuf = buf + bufsz;
-												ICU-2983 fix conversion loops in uconv; add/remove signature (BOM)

X-SVN-Rev: 14205
											
										
										
											2003-12-21 01:52:09 +00:00
-												ICU-2983 fix offset handling and error output

X-SVN-Rev: 14206
											
										
										
											2003-12-21 21:49:37 +00:00
+								        // +1 for an added U+FEFF in the intermediate Unicode buffer
 								        fromoffsets = new int32_t[bufsz + 1];
-												ICU-2983 fix conversion loops in uconv; add/remove signature (BOM)

X-SVN-Rev: 14205
											
										
										
											2003-12-21 01:52:09 +00:00
+								    }
 								    ~ConvertFile() {
 								        delete [] buf;
 								        delete [] fromoffsets;
 								    }
 								    UBool convertFile(const char *pname,
 								                      const char *fromcpage,
 								                      UConverterToUCallback toucallback,
 								                      const void *touctxt,
 								                      const char *tocpage,
 								                      UConverterFromUCallback fromucallback,
 								                      const void *fromuctxt,
 								                      UBool fallback,
 								                      const char *translit,
 								                      const char *infilestr,
 								                      FILE * outfile, int verbose);
 								private:
-												ICU-2983 remove "extern" from friend declaration

X-SVN-Rev: 14210
											
										
										
											2003-12-23 00:18:30 +00:00
+								    friend int main(int argc, char **argv);
-												ICU-2983 fix conversion loops in uconv; add/remove signature (BOM)

X-SVN-Rev: 14205
											
										
										
											2003-12-21 01:52:09 +00:00
-												ICU-2983 use separate in/out buffers in case the input is not converted in a single step

X-SVN-Rev: 14217
											
										
										
											2003-12-24 18:08:39 +00:00
+								    char *buf, *outbuf;
-												ICU-2983 fix offset handling and error output

X-SVN-Rev: 14206
											
										
										
											2003-12-21 21:49:37 +00:00
+								    int32_t *fromoffsets;
-												ICU-2983 fix conversion loops in uconv; add/remove signature (BOM)

X-SVN-Rev: 14205
											
										
										
											2003-12-21 01:52:09 +00:00
 								    size_t bufsz;
 								    int8_t signature; // add (1) or remove (-1) a U+FEFF Unicode signature character
 								};
-												ICU-1220 Added uconv as an extra (Unix only for now).

X-SVN-Rev: 5825
											
										
										
											2001-09-19 23:43:16 +00:00
+								// Convert a file from one encoding to another
-												ICU-2983 fix conversion loops in uconv; add/remove signature (BOM)

X-SVN-Rev: 14205
											
										
										
											2003-12-21 01:52:09 +00:00
+								UBool
 								ConvertFile::convertFile(const char *pname,
-												ICU-900 Fixed some compiler warnings.

X-SVN-Rev: 7714
											
										
										
											2002-02-20 00:22:12 +00:00
+								                         const char *fromcpage,
 								                         UConverterToUCallback toucallback,
 								                         const void *touctxt,
 								                         const char *tocpage,
 								                         UConverterFromUCallback fromucallback,
 								                         const void *fromuctxt,
-												ICU-2983 fix conversion loops in uconv; add/remove signature (BOM)

X-SVN-Rev: 14205
											
										
										
											2003-12-21 01:52:09 +00:00
+								                         UBool fallback,
-												ICU-900 Fixed some compiler warnings.

X-SVN-Rev: 7714
											
										
										
											2002-02-20 00:22:12 +00:00
+								                         const char *translit,
 								                         const char *infilestr,
 								                         FILE * outfile, int verbose)
 								{
-												ICU-1220 many files can now be transcoded at the same time. I was
tempted to let one do uconv -t utf-8 -f latin1 file1 -f euc-jp file2 so
that many files of various encodings could be converted at the same time
to a single encoding, but will do that later after cleaning up the sloppy
way I enabled multiple files for today.

X-SVN-Rev: 7416
											
										
										
											2002-01-09 01:31:14 +00:00
+								    FILE *infile;
 								    UBool ret = TRUE;
-												ICU-1605 for UCNV_ESCAPE_UNICODE, print the codepoint, not the pair of
code units. Also, delimit the U+XXXX with curly braces for now.

X-SVN-Rev: 7514
											
										
										
											2002-01-28 18:47:35 +00:00
+								    UConverter *convfrom = 0;
 								    UConverter *convto = 0;
-												ICU-1220 Added uconv as an extra (Unix only for now).

X-SVN-Rev: 5825
											
										
										
											2001-09-19 23:43:16 +00:00
+								    UErrorCode err = U_ZERO_ERROR;
-												ICU-1605 for UCNV_ESCAPE_UNICODE, print the codepoint, not the pair of
code units. Also, delimit the U+XXXX with curly braces for now.

X-SVN-Rev: 7514
											
										
										
											2002-01-28 18:47:35 +00:00
+								    UBool flush;
-												ICU-2983 fix offset handling and error output

X-SVN-Rev: 14206
											
										
										
											2003-12-21 21:49:37 +00:00
+								    const char *cbufp, *prevbufp;
-												ICU-1680 Fix crash with transliterators that were exceeding the
buffer size.
Also, improve speed a bit.
Finally, add a -b, --block-size size option, which not only is useful
for Real People(tm) wanting a bigger buffer size, but also great for
testing that everything runs smoothly by using -b 1 for example.

X-SVN-Rev: 7548
											
										
										
											2002-01-31 22:44:42 +00:00
+								    char *bufp;
-												ICU-1220 Added uconv as an extra (Unix only for now).

X-SVN-Rev: 5825
											
										
										
											2001-09-19 23:43:16 +00:00
-												ICU-1220 switch to error reporting as offsets in the output when
getting weird offsets.
Flush what can be saved when there is an error so that the output is only
truncated where the error occured.

X-SVN-Rev: 7760
											
										
										
											2002-02-25 06:24:08 +00:00
+								    uint32_t infoffset = 0, outfoffset = 0;   /* Where we are in the file, for error reporting. */
-												ICU-1220 Added escape-codepoint as a callback to let people use the
new UCNV_ESCAPE_CODEPOINT.
Options compatibility with iconv(1): added -V, --version; -s, --silent;
--verbose (and -v because of ICU commands); -o, --output file. uconv(1) is
now mostly compatible with iconv(1) usage, except for the fact that one
cannot convert many files in one run yet.

X-SVN-Rev: 7414
											
										
										
											2002-01-09 01:04:32 +00:00
-												ICU-2983 fix conversion loops in uconv; add/remove signature (BOM)

X-SVN-Rev: 14205
											
										
										
											2003-12-21 01:52:09 +00:00
+								    const UChar *unibuf, *unibufbp;
-												ICU-1680 Fix crash with transliterators that were exceeding the
buffer size.
Also, improve speed a bit.
Finally, add a -b, --block-size size option, which not only is useful
for Real People(tm) wanting a bigger buffer size, but also great for
testing that everything runs smoothly by using -b 1 for example.

X-SVN-Rev: 7548
											
										
										
											2002-01-31 22:44:42 +00:00
+								    UChar *unibufp;
-												ICU-1220 Added uconv as an extra (Unix only for now).

X-SVN-Rev: 5825
											
										
										
											2001-09-19 23:43:16 +00:00
-												ICU-2983 fix conversion loops in uconv; add/remove signature (BOM)

X-SVN-Rev: 14205
											
										
										
											2003-12-21 01:52:09 +00:00
+								    size_t rd, wr;
-												ICU-1220 Added uconv as an extra (Unix only for now).

X-SVN-Rev: 5825
											
										
										
											2001-09-19 23:43:16 +00:00
-												ICU-2248 modularize ICU

X-SVN-Rev: 9934
											
										
										
											2002-10-01 01:26:49 +00:00
+								#if !UCONFIG_NO_TRANSLITERATION
-												ICU-1680 Fix crash with transliterators that were exceeding the
buffer size.
Also, improve speed a bit.
Finally, add a -b, --block-size size option, which not only is useful
for Real People(tm) wanting a bigger buffer size, but also great for
testing that everything runs smoothly by using -b 1 for example.

X-SVN-Rev: 7548
											
										
										
											2002-01-31 22:44:42 +00:00
+								    Transliterator *t = 0;      // Transliterator acting on Unicode data.
-												ICU-2983 improve transforms with paragraph chunking

X-SVN-Rev: 14208
											
										
										
											2003-12-22 19:11:42 +00:00
+								    UnicodeString chunk;        // One chunk of the text being collected for transformation.
-												ICU-2248 modularize ICU

X-SVN-Rev: 9934
											
										
										
											2002-10-01 01:26:49 +00:00
+								#endif
-												ICU-1680 Fix crash with transliterators that were exceeding the
buffer size.
Also, improve speed a bit.
Finally, add a -b, --block-size size option, which not only is useful
for Real People(tm) wanting a bigger buffer size, but also great for
testing that everything runs smoothly by using -b 1 for example.

X-SVN-Rev: 7548
											
										
										
											2002-01-31 22:44:42 +00:00
+								    UnicodeString u;            // String to do the transliteration.
-												ICU-2983 fix conversion loops in uconv; add/remove signature (BOM)

X-SVN-Rev: 14205
											
										
										
											2003-12-21 01:52:09 +00:00
+								    int32_t ulen;
-												ICU-1220 Added uconv as an extra (Unix only for now).

X-SVN-Rev: 5825
											
										
										
											2001-09-19 23:43:16 +00:00
-												ICU-2983 improve transforms with paragraph chunking

X-SVN-Rev: 14208
											
										
										
											2003-12-22 19:11:42 +00:00
+								    // use conversion offsets for error messages
 								    // unless a transliterator is used -
 								    // a text transformation will reorder characters in unpredictable ways
 								    UBool useOffsets = TRUE;
-												ICU-1220 many files can now be transcoded at the same time. I was
tempted to let one do uconv -t utf-8 -f latin1 file1 -f euc-jp file2 so
that many files of various encodings could be converted at the same time
to a single encoding, but will do that later after cleaning up the sloppy
way I enabled multiple files for today.

X-SVN-Rev: 7416
											
										
										
											2002-01-09 01:31:14 +00:00
+								    // Open the correct input file or connect to stdin for reading input
-												ICU-1605 for UCNV_ESCAPE_UNICODE, print the codepoint, not the pair of
code units. Also, delimit the U+XXXX with curly braces for now.

X-SVN-Rev: 7514
											
										
										
											2002-01-28 18:47:35 +00:00
+								    if (infilestr != 0 && strcmp(infilestr, "-")) {
-												ICU-900 Fixed some compiler warnings.

X-SVN-Rev: 7714
											
										
										
											2002-02-20 00:22:12 +00:00
+								        infile = fopen(infilestr, "rb");
 								        if (infile == 0) {
 								            UnicodeString str1(infilestr, "");
-												ICU-1828 Remove tabs

X-SVN-Rev: 8328
											
										
										
											2002-04-02 01:17:28 +00:00
+								            str1.append((UChar32) 0);
-												ICU-900 Fixed some compiler warnings.

X-SVN-Rev: 7714
											
										
										
											2002-02-20 00:22:12 +00:00
+								            UnicodeString str2(strerror(errno), "");
-												ICU-1828 Remove tabs

X-SVN-Rev: 8328
											
										
										
											2002-04-02 01:17:28 +00:00
+								            str2.append((UChar32) 0);
-												ICU-900 Fixed some compiler warnings.

X-SVN-Rev: 7714
											
										
										
											2002-02-20 00:22:12 +00:00
+								            initMsg(pname);
-												ICU-1220 change u_wmsg(); consistently use it in uconv.

X-SVN-Rev: 8113
											
										
										
											2002-03-19 17:14:10 +00:00
+								            u_wmsg(stderr, "cantOpenInputF", str1.getBuffer(), str2.getBuffer());
-												ICU-900 Fixed some compiler warnings.

X-SVN-Rev: 7714
											
										
										
											2002-02-20 00:22:12 +00:00
+								            return FALSE;
 								        }
-												ICU-1605 for UCNV_ESCAPE_UNICODE, print the codepoint, not the pair of
code units. Also, delimit the U+XXXX with curly braces for now.

X-SVN-Rev: 7514
											
										
										
											2002-01-28 18:47:35 +00:00
+								    } else {
-												ICU-900 Fixed some compiler warnings.

X-SVN-Rev: 7714
											
										
										
											2002-02-20 00:22:12 +00:00
+								        infilestr = "-";
 								        infile = stdin;
-												ICU-3389 Fix the case when newline translation is or is not used.

X-SVN-Rev: 14734
											
										
										
											2004-03-23 06:46:35 +00:00
+								#if defined(WIN32) || defined(U_CYGWIN)
-												ICU-900 Fixed some compiler warnings.

X-SVN-Rev: 7714
											
										
										
											2002-02-20 00:22:12 +00:00
+								        if (setmode(fileno(stdin), O_BINARY) == -1) {
-												ICU-1828 Remove tabs

X-SVN-Rev: 8328
											
										
										
											2002-04-02 01:17:28 +00:00
+								            initMsg(pname);
 								            u_wmsg(stderr, "cantSetInBinMode");
-												ICU-900 Fixed some compiler warnings.

X-SVN-Rev: 7714
											
										
										
											2002-02-20 00:22:12 +00:00
+								            return FALSE;
 								        }
-												ICU-1220 many files can now be transcoded at the same time. I was
tempted to let one do uconv -t utf-8 -f latin1 file1 -f euc-jp file2 so
that many files of various encodings could be converted at the same time
to a single encoding, but will do that later after cleaning up the sloppy
way I enabled multiple files for today.

X-SVN-Rev: 7416
											
										
										
											2002-01-09 01:31:14 +00:00
+								#endif
 								    }
 								    if (verbose) {
-												ICU-900 Fixed some compiler warnings.

X-SVN-Rev: 7714
											
										
										
											2002-02-20 00:22:12 +00:00
+								        fprintf(stderr, "%s:\n", infilestr);
-												ICU-1220 many files can now be transcoded at the same time. I was
tempted to let one do uconv -t utf-8 -f latin1 file1 -f euc-jp file2 so
that many files of various encodings could be converted at the same time
to a single encoding, but will do that later after cleaning up the sloppy
way I enabled multiple files for today.

X-SVN-Rev: 7416
											
										
										
											2002-01-09 01:31:14 +00:00
+								    }
-												ICU-1680 Fix crash with transliterators that were exceeding the
buffer size.
Also, improve speed a bit.
Finally, add a -b, --block-size size option, which not only is useful
for Real People(tm) wanting a bigger buffer size, but also great for
testing that everything runs smoothly by using -b 1 for example.

X-SVN-Rev: 7548
											
										
										
											2002-01-31 22:44:42 +00:00
-												ICU-2248 modularize ICU

X-SVN-Rev: 9934
											
										
										
											2002-10-01 01:26:49 +00:00
+								#if !UCONFIG_NO_TRANSLITERATION
-												ICU-1220 many files can now be transcoded at the same time. I was
tempted to let one do uconv -t utf-8 -f latin1 file1 -f euc-jp file2 so
that many files of various encodings could be converted at the same time
to a single encoding, but will do that later after cleaning up the sloppy
way I enabled multiple files for today.

X-SVN-Rev: 7416
											
										
										
											2002-01-09 01:31:14 +00:00
+								    // Create transliterator as needed.
-												ICU-1605 for UCNV_ESCAPE_UNICODE, print the codepoint, not the pair of
code units. Also, delimit the U+XXXX with curly braces for now.

X-SVN-Rev: 7514
											
										
										
											2002-01-28 18:47:35 +00:00
+								    if (translit != NULL && *translit) {
-												ICU-1220 support IDs and compound IDs, but also complex rules in the -x
argument.

X-SVN-Rev: 7688
											
										
										
											2002-02-16 07:43:26 +00:00
+								        UParseError parse;
-												ICU-900 Fixed some compiler warnings.

X-SVN-Rev: 7714
											
										
										
											2002-02-20 00:22:12 +00:00
+								        UnicodeString str(translit), pestr;
-												ICU-1220 support IDs and compound IDs, but also complex rules in the -x
argument.

X-SVN-Rev: 7688
											
										
										
											2002-02-16 07:43:26 +00:00
 								        /* Create from rules or by ID as needed. */
-												ICU-1220 error reporting for rules parsing errors.

X-SVN-Rev: 7690
											
										
										
											2002-02-16 08:47:31 +00:00
+								        parse.line = -1;
-												ICU-1220 support IDs and compound IDs, but also complex rules in the -x
argument.

X-SVN-Rev: 7688
											
										
										
											2002-02-16 07:43:26 +00:00
+								        if (uprv_strchr(translit, ':') || uprv_strchr(translit, '>') || uprv_strchr(translit, '<') || uprv_strchr(translit, '>')) {
-												ICU-900 Fixed some compiler warnings.

X-SVN-Rev: 7714
											
										
										
											2002-02-20 00:22:12 +00:00
+								            t = Transliterator::createFromRules("Uconv", str, UTRANS_FORWARD, parse, err);
-												ICU-1220 support IDs and compound IDs, but also complex rules in the -x
argument.

X-SVN-Rev: 7688
											
										
										
											2002-02-16 07:43:26 +00:00
+								        } else {
 								            t = Transliterator::createInstance(translit, UTRANS_FORWARD, err);
 								        }
-												ICU-900 Fixed some compiler warnings.

X-SVN-Rev: 7714
											
										
										
											2002-02-20 00:22:12 +00:00
+								        if (U_FAILURE(err)) {
-												ICU-1828 Remove tabs

X-SVN-Rev: 8328
											
										
										
											2002-04-02 01:17:28 +00:00
+								            str.append((UChar32) 0);
 								            initMsg(pname);
-												ICU-1220 change u_wmsg(); consistently use it in uconv.

X-SVN-Rev: 8113
											
										
										
											2002-03-19 17:14:10 +00:00
-												ICU-1220 error reporting for rules parsing errors.

X-SVN-Rev: 7690
											
										
										
											2002-02-16 08:47:31 +00:00
+								            if (parse.line >= 0) {
-												ICU-1220 change u_wmsg(); consistently use it in uconv.

X-SVN-Rev: 8113
											
										
										
											2002-03-19 17:14:10 +00:00
+								                UChar linebuf[20], offsetbuf[20];
-												ICU-122 add capacity to uprv_itou

X-SVN-Rev: 10908
											
										
										
											2003-01-27 17:24:33 +00:00
+								                uprv_itou(linebuf, 20, parse.line, 10, 0);
 								                uprv_itou(offsetbuf, 20, parse.offset, 10, 0);
-												ICU-2983 fix conversion loops in uconv; add/remove signature (BOM)

X-SVN-Rev: 14205
											
										
										
											2003-12-21 01:52:09 +00:00
+								                u_wmsg(stderr, "cantCreateTranslitParseErr", str.getTerminatedBuffer(),
-												ICU-1220 change u_wmsg(); consistently use it in uconv.

X-SVN-Rev: 8113
											
										
										
											2002-03-19 17:14:10 +00:00
+								                    u_wmsg_errorName(err), linebuf, offsetbuf);
 								            } else {
-												ICU-2983 fix conversion loops in uconv; add/remove signature (BOM)

X-SVN-Rev: 14205
											
										
										
											2003-12-21 01:52:09 +00:00
+								                u_wmsg(stderr, "cantCreateTranslit", str.getTerminatedBuffer(),
-												ICU-1220 change u_wmsg(); consistently use it in uconv.

X-SVN-Rev: 8113
											
										
										
											2002-03-19 17:14:10 +00:00
+								                    u_wmsg_errorName(err));
-												ICU-1828 Remove tabs

X-SVN-Rev: 8328
											
										
										
											2002-04-02 01:17:28 +00:00
+								            }
-												ICU-1220 change u_wmsg(); consistently use it in uconv.

X-SVN-Rev: 8113
											
										
										
											2002-03-19 17:14:10 +00:00
-												ICU-900 Fixed some compiler warnings.

X-SVN-Rev: 7714
											
										
										
											2002-02-20 00:22:12 +00:00
+								            if (t) {
 								                delete t;
 								                t = 0;
 								            }
 								            goto error_exit;
 								        }
-												ICU-2983 improve transforms with paragraph chunking

X-SVN-Rev: 14208
											
										
										
											2003-12-22 19:11:42 +00:00
 								        useOffsets = FALSE;
-												ICU-1605 for UCNV_ESCAPE_UNICODE, print the codepoint, not the pair of
code units. Also, delimit the U+XXXX with curly braces for now.

X-SVN-Rev: 7514
											
										
										
											2002-01-28 18:47:35 +00:00
+								    }
-												ICU-2248 modularize ICU

X-SVN-Rev: 9934
											
										
										
											2002-10-01 01:26:49 +00:00
+								#endif
-												ICU-1680 Fix crash with transliterators that were exceeding the
buffer size.
Also, improve speed a bit.
Finally, add a -b, --block-size size option, which not only is useful
for Real People(tm) wanting a bigger buffer size, but also great for
testing that everything runs smoothly by using -b 1 for example.

X-SVN-Rev: 7548
											
										
										
											2002-01-31 22:44:42 +00:00
-												ICU-1220 Added uconv as an extra (Unix only for now).

X-SVN-Rev: 5825
											
										
										
											2001-09-19 23:43:16 +00:00
+								    // Create codepage converter. If the codepage or its aliases weren't
-												ICU-1220 Added escape-codepoint as a callback to let people use the
new UCNV_ESCAPE_CODEPOINT.
Options compatibility with iconv(1): added -V, --version; -s, --silent;
--verbose (and -v because of ICU commands); -o, --output file. uconv(1) is
now mostly compatible with iconv(1) usage, except for the fact that one
cannot convert many files in one run yet.

X-SVN-Rev: 7414
											
										
										
											2002-01-09 01:04:32 +00:00
+								    // available, it returns NULL and a failure code. We also set the
 								    // callbacks, and return errors in the same way.
-												ICU-1200 do not use the soon to be deprecated UnicodeConverter.

X-SVN-Rev: 7355
											
										
										
											2001-12-20 00:50:00 +00:00
+								    convfrom = ucnv_open(fromcpage, &err);
-												ICU-1605 for UCNV_ESCAPE_UNICODE, print the codepoint, not the pair of
code units. Also, delimit the U+XXXX with curly braces for now.

X-SVN-Rev: 7514
											
										
										
											2002-01-28 18:47:35 +00:00
+								    if (U_FAILURE(err)) {
-												ICU-2983 fix offset handling and error output

X-SVN-Rev: 14206
											
										
										
											2003-12-21 21:49:37 +00:00
+								        UnicodeString str(fromcpage, "");
-												ICU-900 Fixed some compiler warnings.

X-SVN-Rev: 7714
											
										
										
											2002-02-20 00:22:12 +00:00
+								        initMsg(pname);
-												ICU-2983 fix conversion loops in uconv; add/remove signature (BOM)

X-SVN-Rev: 14205
											
										
										
											2003-12-21 01:52:09 +00:00
+								        u_wmsg(stderr, "cantOpenFromCodeset", str.getTerminatedBuffer(),
-												ICU-900 Fixed some compiler warnings.

X-SVN-Rev: 7714
											
										
										
											2002-02-20 00:22:12 +00:00
+								            u_wmsg_errorName(err));
 								        goto error_exit;
-												ICU-1220 Added uconv as an extra (Unix only for now).

X-SVN-Rev: 5825
											
										
										
											2001-09-19 23:43:16 +00:00
+								    }
-												ICU-1680 Fix crash with transliterators that were exceeding the
buffer size.
Also, improve speed a bit.
Finally, add a -b, --block-size size option, which not only is useful
for Real People(tm) wanting a bigger buffer size, but also great for
testing that everything runs smoothly by using -b 1 for example.

X-SVN-Rev: 7548
											
										
										
											2002-01-31 22:44:42 +00:00
+								    ucnv_setToUCallBack(convfrom, toucallback, touctxt, 0, 0, &err);
-												ICU-1605 for UCNV_ESCAPE_UNICODE, print the codepoint, not the pair of
code units. Also, delimit the U+XXXX with curly braces for now.

X-SVN-Rev: 7514
											
										
										
											2002-01-28 18:47:35 +00:00
+								    if (U_FAILURE(err)) {
-												ICU-900 Fixed some compiler warnings.

X-SVN-Rev: 7714
											
										
										
											2002-02-20 00:22:12 +00:00
+								        initMsg(pname);
-												ICU-1220 change u_wmsg(); consistently use it in uconv.

X-SVN-Rev: 8113
											
										
										
											2002-03-19 17:14:10 +00:00
+								        u_wmsg(stderr, "cantSetCallback", u_wmsg_errorName(err));
-												ICU-900 Fixed some compiler warnings.

X-SVN-Rev: 7714
											
										
										
											2002-02-20 00:22:12 +00:00
+								        goto error_exit;
-												ICU-1220 added iconv-compatibility option -c, and added -i for input

X-SVN-Rev: 7399
											
										
										
											2002-01-07 21:34:41 +00:00
+								    }
-												ICU-1220 Added uconv as an extra (Unix only for now).

X-SVN-Rev: 5825
											
										
										
											2001-09-19 23:43:16 +00:00
-												ICU-1200 do not use the soon to be deprecated UnicodeConverter.

X-SVN-Rev: 7355
											
										
										
											2001-12-20 00:50:00 +00:00
+								    convto = ucnv_open(tocpage, &err);
-												ICU-1605 for UCNV_ESCAPE_UNICODE, print the codepoint, not the pair of
code units. Also, delimit the U+XXXX with curly braces for now.

X-SVN-Rev: 7514
											
										
										
											2002-01-28 18:47:35 +00:00
+								    if (U_FAILURE(err)) {
-												ICU-2983 fix offset handling and error output

X-SVN-Rev: 14206
											
										
										
											2003-12-21 21:49:37 +00:00
+								        UnicodeString str(tocpage, "");
-												ICU-900 Fixed some compiler warnings.

X-SVN-Rev: 7714
											
										
										
											2002-02-20 00:22:12 +00:00
+								        initMsg(pname);
-												ICU-2983 fix conversion loops in uconv; add/remove signature (BOM)

X-SVN-Rev: 14205
											
										
										
											2003-12-21 01:52:09 +00:00
+								        u_wmsg(stderr, "cantOpenToCodeset", str.getTerminatedBuffer(),
-												ICU-900 Fixed some compiler warnings.

X-SVN-Rev: 7714
											
										
										
											2002-02-20 00:22:12 +00:00
+								            u_wmsg_errorName(err));
 								        goto error_exit;
-												ICU-1220 Added uconv as an extra (Unix only for now).

X-SVN-Rev: 5825
											
										
										
											2001-09-19 23:43:16 +00:00
+								    }
-												ICU-1680 Fix crash with transliterators that were exceeding the
buffer size.
Also, improve speed a bit.
Finally, add a -b, --block-size size option, which not only is useful
for Real People(tm) wanting a bigger buffer size, but also great for
testing that everything runs smoothly by using -b 1 for example.

X-SVN-Rev: 7548
											
										
										
											2002-01-31 22:44:42 +00:00
+								    ucnv_setFromUCallBack(convto, fromucallback, fromuctxt, 0, 0, &err);
-												ICU-1605 for UCNV_ESCAPE_UNICODE, print the codepoint, not the pair of
code units. Also, delimit the U+XXXX with curly braces for now.

X-SVN-Rev: 7514
											
										
										
											2002-01-28 18:47:35 +00:00
+								    if (U_FAILURE(err)) {
-												ICU-900 Fixed some compiler warnings.

X-SVN-Rev: 7714
											
										
										
											2002-02-20 00:22:12 +00:00
+								        initMsg(pname);
-												ICU-1220 change u_wmsg(); consistently use it in uconv.

X-SVN-Rev: 8113
											
										
										
											2002-03-19 17:14:10 +00:00
+								        u_wmsg(stderr, "cantSetCallback", u_wmsg_errorName(err));
-												ICU-900 Fixed some compiler warnings.

X-SVN-Rev: 7714
											
										
										
											2002-02-20 00:22:12 +00:00
+								        goto error_exit;
-												ICU-1220 added iconv-compatibility option -c, and added -i for input

X-SVN-Rev: 7399
											
										
										
											2002-01-07 21:34:41 +00:00
+								    }
-												ICU-1220 Report position at which errors occur.
Fix memory dumps in messages by explicetly adding U+0000 at the end of the
strings that we getBuffer() to u_wmsg().
Add --fallback and --no-fallback options to control use of fallback.

X-SVN-Rev: 7421
											
										
										
											2002-01-09 20:42:19 +00:00
+								    ucnv_setFallback(convto, fallback);
-												ICU-1220 Added uconv as an extra (Unix only for now).

X-SVN-Rev: 5825
											
										
										
											2001-09-19 23:43:16 +00:00
-												ICU-2983 fix conversion loops in uconv; add/remove signature (BOM)

X-SVN-Rev: 14205
											
										
										
											2003-12-21 01:52:09 +00:00
+								    UBool willexit, fromSawEndOfBytes, toSawEndOfUnicode;
 								    int8_t sig;
-												ICU-1220 Added escape-codepoint as a callback to let people use the
new UCNV_ESCAPE_CODEPOINT.
Options compatibility with iconv(1): added -V, --version; -s, --silent;
--verbose (and -v because of ICU commands); -o, --output file. uconv(1) is
now mostly compatible with iconv(1) usage, except for the fact that one
cannot convert many files in one run yet.

X-SVN-Rev: 7414
											
										
										
											2002-01-09 01:04:32 +00:00
 								    // OK, we can convert now.
-												ICU-2983 fix conversion loops in uconv; add/remove signature (BOM)

X-SVN-Rev: 14205
											
										
										
											2003-12-21 01:52:09 +00:00
+								    sig = signature;
-												ICU-2983 fix offset handling and error output

X-SVN-Rev: 14206
											
										
										
											2003-12-21 21:49:37 +00:00
+								    rd = 0;
-												ICU-1220 Added escape-codepoint as a callback to let people use the
new UCNV_ESCAPE_CODEPOINT.
Options compatibility with iconv(1): added -V, --version; -s, --silent;
--verbose (and -v because of ICU commands); -o, --output file. uconv(1) is
now mostly compatible with iconv(1) usage, except for the fact that one
cannot convert many files in one run yet.

X-SVN-Rev: 7414
											
										
										
											2002-01-09 01:04:32 +00:00
-												ICU-1605 for UCNV_ESCAPE_UNICODE, print the codepoint, not the pair of
code units. Also, delimit the U+XXXX with curly braces for now.

X-SVN-Rev: 7514
											
										
										
											2002-01-28 18:47:35 +00:00
+								    do {
-												ICU-2983 fix conversion loops in uconv; add/remove signature (BOM)

X-SVN-Rev: 14205
											
										
										
											2003-12-21 01:52:09 +00:00
+								        willexit = FALSE;
-												ICU-1220 switch to error reporting as offsets in the output when
getting weird offsets.
Flush what can be saved when there is an error so that the output is only
truncated where the error occured.

X-SVN-Rev: 7760
											
										
										
											2002-02-25 06:24:08 +00:00
-												ICU-2983 fix offset handling and error output

X-SVN-Rev: 14206
											
										
										
											2003-12-21 21:49:37 +00:00
+								        // input file offset at the beginning of the next buffer
 								        infoffset += rd;
-												ICU-900 Fixed some compiler warnings.

X-SVN-Rev: 7714
											
										
										
											2002-02-20 00:22:12 +00:00
+								        rd = fread(buf, 1, bufsz, infile);
 								        if (ferror(infile) != 0) {
 								            UnicodeString str(strerror(errno));
 								            initMsg(pname);
-												ICU-2983 fix conversion loops in uconv; add/remove signature (BOM)

X-SVN-Rev: 14205
											
										
										
											2003-12-21 01:52:09 +00:00
+								            u_wmsg(stderr, "cantRead", str.getTerminatedBuffer());
-												ICU-900 Fixed some compiler warnings.

X-SVN-Rev: 7714
											
										
										
											2002-02-20 00:22:12 +00:00
+								            goto error_exit;
 								        }
-												ICU-2983 fix conversion loops in uconv; add/remove signature (BOM)

X-SVN-Rev: 14205
											
										
										
											2003-12-21 01:52:09 +00:00
+								        // Convert the read buffer into the new encoding via Unicode.
 								        // After the call 'unibufp' will be placed behind the last
-												ICU-900 Fixed some compiler warnings.

X-SVN-Rev: 7714
											
										
										
											2002-02-20 00:22:12 +00:00
+								        // character that was converted in the 'unibuf'.
-												ICU-2983 fix conversion loops in uconv; add/remove signature (BOM)

X-SVN-Rev: 14205
											
										
										
											2003-12-21 01:52:09 +00:00
+								        // Also the 'cbufp' is positioned behind the last converted
-												ICU-1680 Fix crash with transliterators that were exceeding the
buffer size.
Also, improve speed a bit.
Finally, add a -b, --block-size size option, which not only is useful
for Real People(tm) wanting a bigger buffer size, but also great for
testing that everything runs smoothly by using -b 1 for example.

X-SVN-Rev: 7548
											
										
										
											2002-01-31 22:44:42 +00:00
+								        // character.
-												ICU-900 Fixed some compiler warnings.

X-SVN-Rev: 7714
											
										
										
											2002-02-20 00:22:12 +00:00
+								        // At the last conversion in the file, flush should be set to
-												ICU-2983 fix conversion loops in uconv; add/remove signature (BOM)

X-SVN-Rev: 14205
											
										
										
											2003-12-21 01:52:09 +00:00
+								        // true so that we get all characters converted.
-												ICU-900 Fixed some compiler warnings.

X-SVN-Rev: 7714
											
										
										
											2002-02-20 00:22:12 +00:00
+								        //
 								        // The converter must be flushed at the end of conversion so
-												ICU-1680 Fix crash with transliterators that were exceeding the
buffer size.
Also, improve speed a bit.
Finally, add a -b, --block-size size option, which not only is useful
for Real People(tm) wanting a bigger buffer size, but also great for
testing that everything runs smoothly by using -b 1 for example.

X-SVN-Rev: 7548
											
										
										
											2002-01-31 22:44:42 +00:00
+								        // that characters on hold also will be written.
-												ICU-900 Fixed some compiler warnings.

X-SVN-Rev: 7714
											
										
										
											2002-02-20 00:22:12 +00:00
+								        cbufp = buf;
-												ICU-2983 fix conversion loops in uconv; add/remove signature (BOM)

X-SVN-Rev: 14205
											
										
										
											2003-12-21 01:52:09 +00:00
+								        flush = (UBool)(rd != bufsz);
-												ICU-900 Fixed some compiler warnings.

X-SVN-Rev: 7714
											
										
										
											2002-02-20 00:22:12 +00:00
-												ICU-2983 fix conversion loops in uconv; add/remove signature (BOM)

X-SVN-Rev: 14205
											
										
										
											2003-12-21 01:52:09 +00:00
+								        // convert until the input is consumed
 								        do {
-												ICU-2983 fix offset handling and error output

X-SVN-Rev: 14206
											
										
										
											2003-12-21 21:49:37 +00:00
+								            // remember the start of the current byte-to-Unicode conversion
 								            prevbufp = cbufp;
-												ICU-2983 fix conversion loops in uconv; add/remove signature (BOM)

X-SVN-Rev: 14205
											
										
										
											2003-12-21 01:52:09 +00:00
+								            unibuf = unibufp = u.getBuffer((int32_t)bufsz);
 								            // Use bufsz instead of u.getCapacity() for the targetLimit
 								            // so that we don't overflow fromoffsets[].
 								            ucnv_toUnicode(convfrom, &unibufp, unibuf + bufsz, &cbufp,
-												ICU-2983 improve transforms with paragraph chunking

X-SVN-Rev: 14208
											
										
										
											2003-12-22 19:11:42 +00:00
+								                buf + rd, useOffsets ? fromoffsets : NULL, flush, &err);
-												ICU-2983 fix conversion loops in uconv; add/remove signature (BOM)

X-SVN-Rev: 14205
											
										
										
											2003-12-21 01:52:09 +00:00
 								            ulen = (int32_t)(unibufp - unibuf);
 								            u.releaseBuffer(ulen);
 								            // fromSawEndOfBytes indicates that ucnv_toUnicode() is done
 								            // converting all of the input bytes.
 								            // It works like this because ucnv_toUnicode() returns only under the
 								            // following conditions:
 								            // - an error occurred during conversion (an error code is set)
 								            // - the target buffer is filled (the error code indicates an overflow)
 								            // - the source is consumed
 								            // That is, if the error code does not indicate a failure,
 								            // not even an overflow, then the source must be consumed entirely.
 								            fromSawEndOfBytes = (UBool)U_SUCCESS(err);
 								            if (err == U_BUFFER_OVERFLOW_ERROR) {
 								                err = U_ZERO_ERROR;
 								            } else if (U_FAILURE(err)) {
-												ICU-2983 fix offset handling and error output

X-SVN-Rev: 14206
											
										
										
											2003-12-21 21:49:37 +00:00
+								                char pos[32], errorBytes[32];
 								                int8_t i, length, errorLength;
 								                UErrorCode localError = U_ZERO_ERROR;
 								                errorLength = (int8_t)sizeof(errorBytes);
 								                ucnv_getInvalidChars(convfrom, errorBytes, &errorLength, &localError);
 								                if (U_FAILURE(localError) || errorLength == 0) {
 								                    errorLength = 1;
 								                }
 								                // print the input file offset of the start of the error bytes:
 								                // input file offset of the current byte buffer +
 								                // length of the just consumed bytes -
 								                // length of the error bytes
 								                length =
 								                    (int8_t)sprintf(pos, "%d",
 								                        (int)(infoffset + (cbufp - buf) - errorLength));
 								                // output the bytes that caused the error
 								                UnicodeString str;
 								                for (i = 0; i < errorLength; ++i) {
 								                    if (i > 0) {
-												ICU-2983 improve transforms with paragraph chunking

X-SVN-Rev: 14208
											
										
										
											2003-12-22 19:11:42 +00:00
+								                        str.append((UChar)uSP);
-												ICU-2983 fix offset handling and error output

X-SVN-Rev: 14206
											
										
										
											2003-12-21 21:49:37 +00:00
+								                    }
 								                    str.append(nibbleToHex((uint8_t)errorBytes[i] >> 4));
 								                    str.append(nibbleToHex((uint8_t)errorBytes[i]));
 								                }
-												ICU-2983 fix conversion loops in uconv; add/remove signature (BOM)

X-SVN-Rev: 14205
											
										
										
											2003-12-21 01:52:09 +00:00
+								                initMsg(pname);
-												ICU-2983 fix offset handling and error output

X-SVN-Rev: 14206
											
										
										
											2003-12-21 21:49:37 +00:00
+								                u_wmsg(stderr, "problemCvtToU",
 								                        UnicodeString(pos, length, "").getTerminatedBuffer(),
 								                        str.getTerminatedBuffer(),
 								                        u_wmsg_errorName(err));
-												ICU-2983 fix conversion loops in uconv; add/remove signature (BOM)

X-SVN-Rev: 14205
											
										
										
											2003-12-21 01:52:09 +00:00
+								                willexit = TRUE;
 								                err = U_ZERO_ERROR; /* reset the error for the rest of the conversion. */
 								            }
-												ICU-900 Fixed some compiler warnings.

X-SVN-Rev: 7714
											
										
										
											2002-02-20 00:22:12 +00:00
-												ICU-2983 fix conversion loops in uconv; add/remove signature (BOM)

X-SVN-Rev: 14205
											
										
										
											2003-12-21 01:52:09 +00:00
+								            // Replaced a check for whether the input was consumed by
 								            // looping until it is; message key "premEndInput" now obsolete.
-												ICU-1680 Fix crash with transliterators that were exceeding the
buffer size.
Also, improve speed a bit.
Finally, add a -b, --block-size size option, which not only is useful
for Real People(tm) wanting a bigger buffer size, but also great for
testing that everything runs smoothly by using -b 1 for example.

X-SVN-Rev: 7548
											
										
										
											2002-01-31 22:44:42 +00:00
-												ICU-2983 fix conversion loops in uconv; add/remove signature (BOM)

X-SVN-Rev: 14205
											
										
										
											2003-12-21 01:52:09 +00:00
+								            if (ulen == 0) {
 								                continue;
 								            }
-												ICU-1605 for UCNV_ESCAPE_UNICODE, print the codepoint, not the pair of
code units. Also, delimit the U+XXXX with curly braces for now.

X-SVN-Rev: 7514
											
										
										
											2002-01-28 18:47:35 +00:00
-												ICU-2983 fix conversion loops in uconv; add/remove signature (BOM)

X-SVN-Rev: 14205
											
										
										
											2003-12-21 01:52:09 +00:00
+								            // remove a U+FEFF Unicode signature character if requested
 								            if (sig < 0) {
-												ICU-2983 improve transforms with paragraph chunking

X-SVN-Rev: 14208
											
										
										
											2003-12-22 19:11:42 +00:00
+								                if (u.charAt(0) == uSig) {
-												ICU-2983 fix conversion loops in uconv; add/remove signature (BOM)

X-SVN-Rev: 14205
											
										
										
											2003-12-21 01:52:09 +00:00
+								                    u.remove(0, 1);
-												ICU-2983 fix offset handling and error output

X-SVN-Rev: 14206
											
										
										
											2003-12-21 21:49:37 +00:00
 								                    // account for the removed UChar and offset
-												ICU-2983 fix conversion loops in uconv; add/remove signature (BOM)

X-SVN-Rev: 14205
											
										
										
											2003-12-21 01:52:09 +00:00
+								                    --ulen;
-												ICU-2983 fix offset handling and error output

X-SVN-Rev: 14206
											
										
										
											2003-12-21 21:49:37 +00:00
-												ICU-2983 improve transforms with paragraph chunking

X-SVN-Rev: 14208
											
										
										
											2003-12-22 19:11:42 +00:00
+								                    if (useOffsets) {
 								                        // remove an offset from fromoffsets[] as well
 								                        // to keep the array parallel with the UChars
 								                        memmove(fromoffsets, fromoffsets + 1, ulen * 4);
 								                    }
-												ICU-2983 fix offset handling and error output

X-SVN-Rev: 14206
											
										
										
											2003-12-21 21:49:37 +00:00
-												ICU-2983 fix conversion loops in uconv; add/remove signature (BOM)

X-SVN-Rev: 14205
											
										
										
											2003-12-21 01:52:09 +00:00
+								                }
 								                sig = 0;
 								            }
-												ICU-1680 Fix crash with transliterators that were exceeding the
buffer size.
Also, improve speed a bit.
Finally, add a -b, --block-size size option, which not only is useful
for Real People(tm) wanting a bigger buffer size, but also great for
testing that everything runs smoothly by using -b 1 for example.

X-SVN-Rev: 7548
											
										
										
											2002-01-31 22:44:42 +00:00
-												ICU-2248 modularize ICU

X-SVN-Rev: 9934
											
										
										
											2002-10-01 01:26:49 +00:00
+								#if !UCONFIG_NO_TRANSLITERATION
-												ICU-2983 fix conversion loops in uconv; add/remove signature (BOM)

X-SVN-Rev: 14205
											
										
										
											2003-12-21 01:52:09 +00:00
+								            // Transliterate/transform if needed.
-												ICU-2983 improve transforms with paragraph chunking

X-SVN-Rev: 14208
											
										
										
											2003-12-22 19:11:42 +00:00
+								            // For transformation, we use chunking code -
 								            // collect Unicode input until, for example, an end-of-line,
-												ICU-2983 fix conversion loops in uconv; add/remove signature (BOM)

X-SVN-Rev: 14205
											
										
										
											2003-12-21 01:52:09 +00:00
+								            // then transform and output-convert that and continue collecting.
-												ICU-2983 improve transforms with paragraph chunking

X-SVN-Rev: 14208
											
										
										
											2003-12-22 19:11:42 +00:00
+								            // This makes the transformation result independent of the buffer size
 								            // while avoiding the slower keyboard mode.
 								            // The end-of-chunk characters are completely included in the
-												ICU-2983 fix conversion loops in uconv; add/remove signature (BOM)

X-SVN-Rev: 14205
											
										
										
											2003-12-21 01:52:09 +00:00
+								            // transformed string in case they are to be transformed themselves.
 								            if (t != NULL) {
-												ICU-2983 improve transforms with paragraph chunking

X-SVN-Rev: 14208
											
										
										
											2003-12-22 19:11:42 +00:00
+								                UnicodeString out;
 								                int32_t chunkLimit;
 								                do {
 								                    chunkLimit = getChunkLimit(chunk, u);
 								                    if (chunkLimit < 0 && flush && fromSawEndOfBytes) {
 								                        // use all of the rest at the end of the text
 								                        chunkLimit = u.length();
 								                    }
 								                    if (chunkLimit >= 0) {
 								                        // complete the chunk and transform it
 								                        chunk.append(u, 0, chunkLimit);
 								                        u.remove(0, chunkLimit);
 								                        t->transliterate(chunk);
 								                        // append the transformation result to the result and empty the chunk
 								                        out.append(chunk);
 								                        chunk.remove();
 								                    } else {
 								                        // continue collecting the chunk
 								                        chunk.append(u);
 								                        break;
 								                    }
 								                } while (!u.isEmpty());
 								                u = out;
-												ICU-2983 fix conversion loops in uconv; add/remove signature (BOM)

X-SVN-Rev: 14205
											
										
										
											2003-12-21 01:52:09 +00:00
+								                ulen = u.length();
 								            }
-												ICU-2248 modularize ICU

X-SVN-Rev: 9934
											
										
										
											2002-10-01 01:26:49 +00:00
+								#endif
-												ICU-1605 for UCNV_ESCAPE_UNICODE, print the codepoint, not the pair of
code units. Also, delimit the U+XXXX with curly braces for now.

X-SVN-Rev: 7514
											
										
										
											2002-01-28 18:47:35 +00:00
-												ICU-2983 fix conversion loops in uconv; add/remove signature (BOM)

X-SVN-Rev: 14205
											
										
										
											2003-12-21 01:52:09 +00:00
+								            // add a U+FEFF Unicode signature character if requested
 								            // and possible/necessary
 								            if (sig > 0) {
-												ICU-2983 improve transforms with paragraph chunking

X-SVN-Rev: 14208
											
										
										
											2003-12-22 19:11:42 +00:00
+								                if (u.charAt(0) != uSig && cnvSigType(convto) == CNV_WITH_FEFF) {
 								                    u.insert(0, (UChar)uSig);
 								                    if (useOffsets) {
 								                        // insert a pseudo-offset into fromoffsets[] as well
 								                        // to keep the array parallel with the UChars
 								                        memmove(fromoffsets + 1, fromoffsets, ulen * 4);
 								                        fromoffsets[0] = -1;
 								                    }
-												ICU-2983 fix offset handling and error output

X-SVN-Rev: 14206
											
										
										
											2003-12-21 21:49:37 +00:00
 								                    // account for the additional UChar and offset
-												ICU-2983 fix conversion loops in uconv; add/remove signature (BOM)

X-SVN-Rev: 14205
											
										
										
											2003-12-21 01:52:09 +00:00
+								                    ++ulen;
-												ICU-1220 switch to error reporting as offsets in the output when
getting weird offsets.
Flush what can be saved when there is an error so that the output is only
truncated where the error occured.

X-SVN-Rev: 7760
											
										
										
											2002-02-25 06:24:08 +00:00
+								                }
-												ICU-2983 fix conversion loops in uconv; add/remove signature (BOM)

X-SVN-Rev: 14205
											
										
										
											2003-12-21 01:52:09 +00:00
+								                sig = 0;
-												ICU-1680 Fix crash with transliterators that were exceeding the
buffer size.
Also, improve speed a bit.
Finally, add a -b, --block-size size option, which not only is useful
for Real People(tm) wanting a bigger buffer size, but also great for
testing that everything runs smoothly by using -b 1 for example.

X-SVN-Rev: 7548
											
										
										
											2002-01-31 22:44:42 +00:00
+								            }
-												ICU-2983 fix conversion loops in uconv; add/remove signature (BOM)

X-SVN-Rev: 14205
											
										
										
											2003-12-21 01:52:09 +00:00
+								            // Convert the Unicode buffer into the destination codepage
 								            // Again 'bufp' will be placed behind the last converted character
 								            // And 'unibufp' will be placed behind the last converted unicode character
 								            // At the last conversion flush should be set to true to ensure that
 								            // all characters left get converted
 								            unibuf = unibufbp = u.getBuffer();
 								            do {
-												ICU-2983 use separate in/out buffers in case the input is not converted in a single step

X-SVN-Rev: 14217
											
										
										
											2003-12-24 18:08:39 +00:00
+								                bufp = outbuf;
-												ICU-2983 fix conversion loops in uconv; add/remove signature (BOM)

X-SVN-Rev: 14205
											
										
										
											2003-12-21 01:52:09 +00:00
 								                // Use fromSawEndOfBytes in addition to the flush flag -
 								                // it indicates whether the intermediate Unicode string
 								                // contains the very last UChars for the very last input bytes.
-												ICU-2983 use separate in/out buffers in case the input is not converted in a single step

X-SVN-Rev: 14217
											
										
										
											2003-12-24 18:08:39 +00:00
+								                ucnv_fromUnicode(convto, &bufp, outbuf + bufsz,
-												ICU-2983 fix conversion loops in uconv; add/remove signature (BOM)

X-SVN-Rev: 14205
											
										
										
											2003-12-21 01:52:09 +00:00
+								                                 &unibufbp,
 								                                 unibuf + ulen,
-												ICU-2983 fix offset handling and error output

X-SVN-Rev: 14206
											
										
										
											2003-12-21 21:49:37 +00:00
+								                                 NULL, (UBool)(flush && fromSawEndOfBytes), &err);
-												ICU-2983 fix conversion loops in uconv; add/remove signature (BOM)

X-SVN-Rev: 14205
											
										
										
											2003-12-21 01:52:09 +00:00
 								                // toSawEndOfUnicode indicates that ucnv_fromUnicode() is done
 								                // converting all of the intermediate UChars.
 								                // See comment for fromSawEndOfBytes.
 								                toSawEndOfUnicode = (UBool)U_SUCCESS(err);
 								                if (err == U_BUFFER_OVERFLOW_ERROR) {
 								                    err = U_ZERO_ERROR;
 								                } else if (U_FAILURE(err)) {
-												ICU-2983 fix offset handling and error output

X-SVN-Rev: 14206
											
										
										
											2003-12-21 21:49:37 +00:00
+								                    UChar errorUChars[4];
-												ICU-2983 fix conversion loops in uconv; add/remove signature (BOM)

X-SVN-Rev: 14205
											
										
										
											2003-12-21 01:52:09 +00:00
+								                    const char *errtag;
 								                    char pos[32];
-												ICU-2983 fix offset handling and error output

X-SVN-Rev: 14206
											
										
										
											2003-12-21 21:49:37 +00:00
+								                    UChar32 c;
 								                    int8_t i, length, errorLength;
 								                    UErrorCode localError = U_ZERO_ERROR;
 								                    errorLength = (int8_t)LENGTHOF(errorUChars);
 								                    ucnv_getInvalidUChars(convto, errorUChars, &errorLength, &localError);
 								                    if (U_FAILURE(localError) || errorLength == 0) {
 								                        // need at least 1 so that we don't access beyond the length of fromoffsets[]
 								                        errorLength = 1;
 								                    }
-												ICU-2983 fix conversion loops in uconv; add/remove signature (BOM)

X-SVN-Rev: 14205
											
										
										
											2003-12-21 01:52:09 +00:00
-												ICU-2983 fix offset handling and error output

X-SVN-Rev: 14206
											
										
										
											2003-12-21 21:49:37 +00:00
+								                    int32_t ferroffset;
-												ICU-2983 improve transforms with paragraph chunking

X-SVN-Rev: 14208
											
										
										
											2003-12-22 19:11:42 +00:00
+								                    if (useOffsets) {
-												ICU-2983 fix offset handling and error output

X-SVN-Rev: 14206
											
										
										
											2003-12-21 21:49:37 +00:00
+								                        // Unicode buffer offset of the start of the error UChars
 								                        ferroffset = (int32_t)((unibufbp - unibuf) - errorLength);
 								                        if (ferroffset < 0) {
 								                            // approximation - the character started in the previous Unicode buffer
 								                            ferroffset = 0;
 								                        }
 								                        // get the corresponding byte offset out of fromoffsets[]
 								                        // go back if the offset is not known for some of the UChars
 								                        int32_t fromoffset;
 								                        do {
 								                            fromoffset = fromoffsets[ferroffset];
 								                        } while (fromoffset < 0 && --ferroffset >= 0);
 								                        // total input file offset =
 								                        // input file offset of the current byte buffer +
 								                        // byte buffer offset of where the current Unicode buffer is converted from +
 								                        // fromoffsets[Unicode offset]
 								                        ferroffset = infoffset + (prevbufp - buf) + fromoffset;
 								                        errtag = "problemCvtFromU";
 								                    } else {
 								                        // Do not use fromoffsets if (t != NULL) because the Unicode text may
 								                        // be different from what the offsets refer to.
-												ICU-2983 fix conversion loops in uconv; add/remove signature (BOM)

X-SVN-Rev: 14205
											
										
										
											2003-12-21 01:52:09 +00:00
-												ICU-2983 fix offset handling and error output

X-SVN-Rev: 14206
											
										
										
											2003-12-21 21:49:37 +00:00
+								                        // output file offset
-												ICU-2983 use separate in/out buffers in case the input is not converted in a single step

X-SVN-Rev: 14217
											
										
										
											2003-12-24 18:08:39 +00:00
+								                        ferroffset = (int32_t)(outfoffset + (bufp - outbuf));
-												ICU-2983 fix conversion loops in uconv; add/remove signature (BOM)

X-SVN-Rev: 14205
											
										
										
											2003-12-21 01:52:09 +00:00
+								                        errtag = "problemCvtFromUOut";
 								                    }
-												ICU-2983 fix offset handling and error output

X-SVN-Rev: 14206
											
										
										
											2003-12-21 21:49:37 +00:00
 								                    length = (int8_t)sprintf(pos, "%u", ferroffset);
 								                    // output the code points that caused the error
 								                    UnicodeString str;
 								                    for (i = 0; i < errorLength;) {
 								                        if (i > 0) {
-												ICU-2983 improve transforms with paragraph chunking

X-SVN-Rev: 14208
											
										
										
											2003-12-22 19:11:42 +00:00
+								                            str.append((UChar)uSP);
-												ICU-2983 fix offset handling and error output

X-SVN-Rev: 14206
											
										
										
											2003-12-21 21:49:37 +00:00
+								                        }
 								                        U16_NEXT(errorUChars, i, errorLength, c);
 								                        if (c >= 0x100000) {
 								                            str.append(nibbleToHex((uint8_t)(c >> 20)));
 								                        }
 								                        if (c >= 0x10000) {
 								                            str.append(nibbleToHex((uint8_t)(c >> 16)));
 								                        }
 								                        str.append(nibbleToHex((uint8_t)(c >> 12)));
 								                        str.append(nibbleToHex((uint8_t)(c >> 8)));
 								                        str.append(nibbleToHex((uint8_t)(c >> 4)));
 								                        str.append(nibbleToHex((uint8_t)c));
 								                    }
-												ICU-2983 fix conversion loops in uconv; add/remove signature (BOM)

X-SVN-Rev: 14205
											
										
										
											2003-12-21 01:52:09 +00:00
+								                    initMsg(pname);
-												ICU-2983 fix offset handling and error output

X-SVN-Rev: 14206
											
										
										
											2003-12-21 21:49:37 +00:00
+								                    u_wmsg(stderr, errtag,
 								                            UnicodeString(pos, length, "").getTerminatedBuffer(),
 								                            str.getTerminatedBuffer(),
-												ICU-2983 fix conversion loops in uconv; add/remove signature (BOM)

X-SVN-Rev: 14205
											
										
										
											2003-12-21 01:52:09 +00:00
+								                           u_wmsg_errorName(err));
-												ICU-2983 fix offset handling and error output

X-SVN-Rev: 14206
											
										
										
											2003-12-21 21:49:37 +00:00
+								                    u_wmsg(stderr, "errorUnicode", str.getTerminatedBuffer());
-												ICU-2983 fix conversion loops in uconv; add/remove signature (BOM)

X-SVN-Rev: 14205
											
										
										
											2003-12-21 01:52:09 +00:00
+								                    willexit = TRUE;
-												ICU-2983 fix offset handling and error output

X-SVN-Rev: 14206
											
										
										
											2003-12-21 21:49:37 +00:00
+								                    err = U_ZERO_ERROR; /* reset the error for the rest of the conversion. */
-												ICU-2983 fix conversion loops in uconv; add/remove signature (BOM)

X-SVN-Rev: 14205
											
										
										
											2003-12-21 01:52:09 +00:00
+								                }
-												ICU-900 Fixed some compiler warnings.

X-SVN-Rev: 7714
											
										
										
											2002-02-20 00:22:12 +00:00
-												ICU-2983 fix conversion loops in uconv; add/remove signature (BOM)

X-SVN-Rev: 14205
											
										
										
											2003-12-21 01:52:09 +00:00
+								                // Replaced a check for whether the intermediate Unicode characters were all consumed by
 								                // looping until they are; message key "premEnd" now obsolete.
-												ICU-1220 switch to error reporting as offsets in the output when
getting weird offsets.
Flush what can be saved when there is an error so that the output is only
truncated where the error occured.

X-SVN-Rev: 7760
											
										
										
											2002-02-25 06:24:08 +00:00
-												ICU-2983 fix conversion loops in uconv; add/remove signature (BOM)

X-SVN-Rev: 14205
											
										
										
											2003-12-21 01:52:09 +00:00
+								                // Finally, write the converted buffer to the output file
-												ICU-2983 use separate in/out buffers in case the input is not converted in a single step

X-SVN-Rev: 14217
											
										
										
											2003-12-24 18:08:39 +00:00
+								                size_t outlen = (size_t) (bufp - outbuf);
 								                outfoffset += (int32_t)(wr = fwrite(outbuf, 1, outlen, outfile));
-												ICU-2983 fix conversion loops in uconv; add/remove signature (BOM)

X-SVN-Rev: 14205
											
										
										
											2003-12-21 01:52:09 +00:00
+								                if (wr != outlen) {
-												ICU-2983 fix offset handling and error output

X-SVN-Rev: 14206
											
										
										
											2003-12-21 21:49:37 +00:00
+								                    UnicodeString str(strerror(errno));
-												ICU-2983 fix conversion loops in uconv; add/remove signature (BOM)

X-SVN-Rev: 14205
											
										
										
											2003-12-21 01:52:09 +00:00
+								                    initMsg(pname);
 								                    u_wmsg(stderr, "cantWrite", str.getTerminatedBuffer());
 								                    willexit = TRUE;
 								                }
-												ICU-1220 switch to error reporting as offsets in the output when
getting weird offsets.
Flush what can be saved when there is an error so that the output is only
truncated where the error occured.

X-SVN-Rev: 7760
											
										
										
											2002-02-25 06:24:08 +00:00
-												ICU-2983 fix conversion loops in uconv; add/remove signature (BOM)

X-SVN-Rev: 14205
											
										
										
											2003-12-21 01:52:09 +00:00
+								                if (willexit) {
 								                    goto error_exit;
 								                }
 								            } while (!toSawEndOfUnicode);
 								        } while (!fromSawEndOfBytes);
-												ICU-900 Fixed some compiler warnings.

X-SVN-Rev: 7714
											
										
										
											2002-02-20 00:22:12 +00:00
+								    } while (!flush);           // Stop when we have flushed the
-												ICU-1680 Fix crash with transliterators that were exceeding the
buffer size.
Also, improve speed a bit.
Finally, add a -b, --block-size size option, which not only is useful
for Real People(tm) wanting a bigger buffer size, but also great for
testing that everything runs smoothly by using -b 1 for example.

X-SVN-Rev: 7548
											
										
										
											2002-01-31 22:44:42 +00:00
+								                                // converters (this means that it's
-												ICU-900 Fixed some compiler warnings.

X-SVN-Rev: 7714
											
										
										
											2002-02-20 00:22:12 +00:00
+								                                // the end of output)
-												ICU-1220 Added uconv as an extra (Unix only for now).

X-SVN-Rev: 5825
											
										
										
											2001-09-19 23:43:16 +00:00
 								    goto normal_exit;
-												ICU-1220 Report position at which errors occur.
Fix memory dumps in messages by explicetly adding U+0000 at the end of the
strings that we getBuffer() to u_wmsg().
Add --fallback and --no-fallback options to control use of fallback.

X-SVN-Rev: 7421
											
										
										
											2002-01-09 20:42:19 +00:00
-												ICU-1680 Fix crash with transliterators that were exceeding the
buffer size.
Also, improve speed a bit.
Finally, add a -b, --block-size size option, which not only is useful
for Real People(tm) wanting a bigger buffer size, but also great for
testing that everything runs smoothly by using -b 1 for example.

X-SVN-Rev: 7548
											
										
										
											2002-01-31 22:44:42 +00:00
+								error_exit:
-												ICU-1220 many files can now be transcoded at the same time. I was
tempted to let one do uconv -t utf-8 -f latin1 file1 -f euc-jp file2 so
that many files of various encodings could be converted at the same time
to a single encoding, but will do that later after cleaning up the sloppy
way I enabled multiple files for today.

X-SVN-Rev: 7416
											
										
										
											2002-01-09 01:31:14 +00:00
+								    ret = FALSE;
-												ICU-1220 Report position at which errors occur.
Fix memory dumps in messages by explicetly adding U+0000 at the end of the
strings that we getBuffer() to u_wmsg().
Add --fallback and --no-fallback options to control use of fallback.

X-SVN-Rev: 7421
											
										
										
											2002-01-09 20:42:19 +00:00
-												ICU-1680 Fix crash with transliterators that were exceeding the
buffer size.
Also, improve speed a bit.
Finally, add a -b, --block-size size option, which not only is useful
for Real People(tm) wanting a bigger buffer size, but also great for
testing that everything runs smoothly by using -b 1 for example.

X-SVN-Rev: 7548
											
										
										
											2002-01-31 22:44:42 +00:00
+								normal_exit:
 								    // Cleanup.
-												ICU-1220 Report position at which errors occur.
Fix memory dumps in messages by explicetly adding U+0000 at the end of the
strings that we getBuffer() to u_wmsg().
Add --fallback and --no-fallback options to control use of fallback.

X-SVN-Rev: 7421
											
										
										
											2002-01-09 20:42:19 +00:00
-												ICU-2983 fix conversion loops in uconv; add/remove signature (BOM)

X-SVN-Rev: 14205
											
										
										
											2003-12-21 01:52:09 +00:00
+								    ucnv_close(convfrom);
 								    ucnv_close(convto);
-												ICU-1220 Added uconv as an extra (Unix only for now).

X-SVN-Rev: 5825
											
										
										
											2001-09-19 23:43:16 +00:00
-												ICU-2248 modularize ICU

X-SVN-Rev: 9934
											
										
										
											2002-10-01 01:26:49 +00:00
+								#if !UCONFIG_NO_TRANSLITERATION
-												ICU-2983 fix conversion loops in uconv; add/remove signature (BOM)

X-SVN-Rev: 14205
											
										
										
											2003-12-21 01:52:09 +00:00
+								    delete t;
-												ICU-2248 modularize ICU

X-SVN-Rev: 9934
											
										
										
											2002-10-01 01:26:49 +00:00
+								#endif
-												ICU-1220 Added uconv as an extra (Unix only for now).

X-SVN-Rev: 5825
											
										
										
											2001-09-19 23:43:16 +00:00
-												ICU-1220 many files can now be transcoded at the same time. I was
tempted to let one do uconv -t utf-8 -f latin1 file1 -f euc-jp file2 so
that many files of various encodings could be converted at the same time
to a single encoding, but will do that later after cleaning up the sloppy
way I enabled multiple files for today.

X-SVN-Rev: 7416
											
										
										
											2002-01-09 01:31:14 +00:00
+								    if (infile != stdin) {
-												ICU-900 Fixed some compiler warnings.

X-SVN-Rev: 7714
											
										
										
											2002-02-20 00:22:12 +00:00
+								        fclose(infile);
-												ICU-1220 many files can now be transcoded at the same time. I was
tempted to let one do uconv -t utf-8 -f latin1 file1 -f euc-jp file2 so
that many files of various encodings could be converted at the same time
to a single encoding, but will do that later after cleaning up the sloppy
way I enabled multiple files for today.

X-SVN-Rev: 7416
											
										
										
											2002-01-09 01:31:14 +00:00
+								    }
-												ICU-1220 Added uconv as an extra (Unix only for now).

X-SVN-Rev: 5825
											
										
										
											2001-09-19 23:43:16 +00:00
+								    return ret;
 								}
-												ICU-1680 Fix crash with transliterators that were exceeding the
buffer size.
Also, improve speed a bit.
Finally, add a -b, --block-size size option, which not only is useful
for Real People(tm) wanting a bigger buffer size, but also great for
testing that everything runs smoothly by using -b 1 for example.

X-SVN-Rev: 7548
											
										
										
											2002-01-31 22:44:42 +00:00
+								static void usage(const char *pname, int ecode) {
-												ICU-1605 for UCNV_ESCAPE_UNICODE, print the codepoint, not the pair of
code units. Also, delimit the U+XXXX with curly braces for now.

X-SVN-Rev: 7514
											
										
										
											2002-01-28 18:47:35 +00:00
+								    const UChar *msg;
 								    int32_t msgLen;
 								    UErrorCode err = U_ZERO_ERROR;
-												ICU-1828 Remove tabs

X-SVN-Rev: 8328
											
										
										
											2002-04-02 01:17:28 +00:00
+								    FILE *fp = ecode ? stderr : stdout;
-												ICU-1220 change u_wmsg(); consistently use it in uconv.

X-SVN-Rev: 8113
											
										
										
											2002-03-19 17:14:10 +00:00
+								    int res;
-												ICU-1605 for UCNV_ESCAPE_UNICODE, print the codepoint, not the pair of
code units. Also, delimit the U+XXXX with curly braces for now.

X-SVN-Rev: 7514
											
										
										
											2002-01-28 18:47:35 +00:00
 								    initMsg(pname);
 								    msg =
-												ICU-900 Fixed some compiler warnings.

X-SVN-Rev: 7714
											
										
										
											2002-02-20 00:22:12 +00:00
+								        ures_getStringByKey(gBundle, ecode ? "lcUsageWord" : "ucUsageWord",
 								                            &msgLen, &err);
-												ICU-900 Fixed some compiler warnings.

X-SVN-Rev: 8681
											
										
										
											2002-05-23 22:10:23 +00:00
+								    UnicodeString upname(pname, (int32_t)(uprv_strlen(pname) + 1));
-												ICU-1605 for UCNV_ESCAPE_UNICODE, print the codepoint, not the pair of
code units. Also, delimit the U+XXXX with curly braces for now.

X-SVN-Rev: 7514
											
										
										
											2002-01-28 18:47:35 +00:00
+								    UnicodeString mname(msg, msgLen + 1);
-												ICU-1220 change u_wmsg(); consistently use it in uconv.

X-SVN-Rev: 8113
											
										
										
											2002-03-19 17:14:10 +00:00
+								    res = u_wmsg(fp, "usage", mname.getBuffer(), upname.getBuffer());
-												ICU-1605 for UCNV_ESCAPE_UNICODE, print the codepoint, not the pair of
code units. Also, delimit the U+XXXX with curly braces for now.

X-SVN-Rev: 7514
											
										
										
											2002-01-28 18:47:35 +00:00
+								    if (!ecode) {
-												ICU-1220 change u_wmsg(); consistently use it in uconv.

X-SVN-Rev: 8113
											
										
										
											2002-03-19 17:14:10 +00:00
+								        if (!res) {
-												ICU-1828 Remove tabs

X-SVN-Rev: 8328
											
										
										
											2002-04-02 01:17:28 +00:00
+								            fputc('\n', fp);
 								        }
-												ICU-1220 change u_wmsg(); consistently use it in uconv.

X-SVN-Rev: 8113
											
										
										
											2002-03-19 17:14:10 +00:00
+								        if (!u_wmsg(fp, "help")) {
 								            /* Now dump callbacks and finish. */
 								            int i, count =
 								                sizeof(transcode_callbacks) / sizeof(*transcode_callbacks);
 								            for (i = 0; i < count; ++i) {
 								                fprintf(fp, " %s", transcode_callbacks[i].name);
-												ICU-1828 Remove tabs

X-SVN-Rev: 8328
											
										
										
											2002-04-02 01:17:28 +00:00
+								            }
-												ICU-1220 change u_wmsg(); consistently use it in uconv.

X-SVN-Rev: 8113
											
										
										
											2002-03-19 17:14:10 +00:00
+								            fputc('\n', fp);
-												ICU-1828 Remove tabs

X-SVN-Rev: 8328
											
										
										
											2002-04-02 01:17:28 +00:00
+								        }
-												ICU-1220 uconv now supports setting callbacks in both directions; see
the manual page, or call uconv -h to have a list (though the manual
page also has explanations for the different callbacks).

X-SVN-Rev: 7403
											
										
										
											2002-01-08 02:34:33 +00:00
+								    }
-												ICU-1220 Better help format. Removed releaseBuffer() calls.

X-SVN-Rev: 5860
											
										
										
											2001-09-21 00:30:25 +00:00
-												ICU-1605 for UCNV_ESCAPE_UNICODE, print the codepoint, not the pair of
code units. Also, delimit the U+XXXX with curly braces for now.

X-SVN-Rev: 7514
											
										
										
											2002-01-28 18:47:35 +00:00
+								    exit(ecode);
-												ICU-1220 Added uconv as an extra (Unix only for now).

X-SVN-Rev: 5825
											
										
										
											2001-09-19 23:43:16 +00:00
+								}
-												ICU-2983 fix conversion loops in uconv; add/remove signature (BOM)

X-SVN-Rev: 14205
											
										
										
											2003-12-21 01:52:09 +00:00
+								extern int
 								main(int argc, char **argv)
-												ICU-1220 Added uconv as an extra (Unix only for now).

X-SVN-Rev: 5825
											
										
										
											2001-09-19 23:43:16 +00:00
+								{
-												ICU-1220 many files can now be transcoded at the same time. I was
tempted to let one do uconv -t utf-8 -f latin1 file1 -f euc-jp file2 so
that many files of various encodings could be converted at the same time
to a single encoding, but will do that later after cleaning up the sloppy
way I enabled multiple files for today.

X-SVN-Rev: 7416
											
										
										
											2002-01-09 01:31:14 +00:00
+								    FILE *outfile;
-												ICU-1605 for UCNV_ESCAPE_UNICODE, print the codepoint, not the pair of
code units. Also, delimit the U+XXXX with curly braces for now.

X-SVN-Rev: 7514
											
										
										
											2002-01-28 18:47:35 +00:00
+								    int ret = 0;
-												ICU-1220 implemented -L, --list-transliterators.
made the -x transliterator option work.
updated manual page and resource bundle to reflect that.

X-SVN-Rev: 7396
											
										
										
											2002-01-07 20:45:00 +00:00
-												ICU-1680 Fix crash with transliterators that were exceeding the
buffer size.
Also, improve speed a bit.
Finally, add a -b, --block-size size option, which not only is useful
for Real People(tm) wanting a bigger buffer size, but also great for
testing that everything runs smoothly by using -b 1 for example.

X-SVN-Rev: 7548
											
										
										
											2002-01-31 22:44:42 +00:00
+								    size_t bufsz = DEFAULT_BUFSZ;
-												ICU-1605 for UCNV_ESCAPE_UNICODE, print the codepoint, not the pair of
code units. Also, delimit the U+XXXX with curly braces for now.

X-SVN-Rev: 7514
											
										
										
											2002-01-28 18:47:35 +00:00
+								    const char *fromcpage = 0;
 								    const char *tocpage = 0;
-												ICU-1220 implemented -L, --list-transliterators.
made the -x transliterator option work.
updated manual page and resource bundle to reflect that.

X-SVN-Rev: 7396
											
										
										
											2002-01-07 20:45:00 +00:00
+								    const char *translit = 0;
-												ICU-1605 for UCNV_ESCAPE_UNICODE, print the codepoint, not the pair of
code units. Also, delimit the U+XXXX with curly braces for now.

X-SVN-Rev: 7514
											
										
										
											2002-01-28 18:47:35 +00:00
+								    const char *outfilestr = 0;
-												ICU-2983 fix conversion loops in uconv; add/remove signature (BOM)

X-SVN-Rev: 14205
											
										
										
											2003-12-21 01:52:09 +00:00
+								    UBool fallback = FALSE;
-												ICU-1220 Added uconv as an extra (Unix only for now).

X-SVN-Rev: 5825
											
										
										
											2001-09-19 23:43:16 +00:00
-												ICU-1220 Added escape-codepoint as a callback to let people use the
new UCNV_ESCAPE_CODEPOINT.
Options compatibility with iconv(1): added -V, --version; -s, --silent;
--verbose (and -v because of ICU commands); -o, --output file. uconv(1) is
now mostly compatible with iconv(1) usage, except for the fact that one
cannot convert many files in one run yet.

X-SVN-Rev: 7414
											
										
										
											2002-01-09 01:04:32 +00:00
+								    UConverterFromUCallback fromucallback = UCNV_FROM_U_CALLBACK_STOP;
-												ICU-1220 uconv now supports setting callbacks in both directions; see
the manual page, or call uconv -h to have a list (though the manual
page also has explanations for the different callbacks).

X-SVN-Rev: 7403
											
										
										
											2002-01-08 02:34:33 +00:00
+								    const void *fromuctxt = 0;
-												ICU-1220 Added escape-codepoint as a callback to let people use the
new UCNV_ESCAPE_CODEPOINT.
Options compatibility with iconv(1): added -V, --version; -s, --silent;
--verbose (and -v because of ICU commands); -o, --output file. uconv(1) is
now mostly compatible with iconv(1) usage, except for the fact that one
cannot convert many files in one run yet.

X-SVN-Rev: 7414
											
										
										
											2002-01-09 01:04:32 +00:00
+								    UConverterToUCallback toucallback = UCNV_TO_U_CALLBACK_STOP;
-												ICU-1220 uconv now supports setting callbacks in both directions; see
the manual page, or call uconv -h to have a list (though the manual
page also has explanations for the different callbacks).

X-SVN-Rev: 7403
											
										
										
											2002-01-08 02:34:33 +00:00
+								    const void *touctxt = 0;
-												ICU-1220 added iconv-compatibility option -c, and added -i for input

X-SVN-Rev: 7399
											
										
										
											2002-01-07 21:34:41 +00:00
-												ICU-2983 fix conversion loops in uconv; add/remove signature (BOM)

X-SVN-Rev: 14205
											
										
										
											2003-12-21 01:52:09 +00:00
+								    char **iter, **remainArgv, **remainArgvLimit;
-												ICU-1605 for UCNV_ESCAPE_UNICODE, print the codepoint, not the pair of
code units. Also, delimit the U+XXXX with curly braces for now.

X-SVN-Rev: 7514
											
										
										
											2002-01-28 18:47:35 +00:00
+								    char **end = argv + argc;
-												ICU-1220 Added uconv as an extra (Unix only for now).

X-SVN-Rev: 5825
											
										
										
											2001-09-19 23:43:16 +00:00
-												ICU-1220 Added escape-codepoint as a callback to let people use the
new UCNV_ESCAPE_CODEPOINT.
Options compatibility with iconv(1): added -V, --version; -s, --silent;
--verbose (and -v because of ICU commands); -o, --output file. uconv(1) is
now mostly compatible with iconv(1) usage, except for the fact that one
cannot convert many files in one run yet.

X-SVN-Rev: 7414
											
										
										
											2002-01-09 01:04:32 +00:00
+								    const char *pname;
-												ICU-1220 Added uconv as an extra (Unix only for now).

X-SVN-Rev: 5825
											
										
										
											2001-09-19 23:43:16 +00:00
-												ICU-2983 fix conversion loops in uconv; add/remove signature (BOM)

X-SVN-Rev: 14205
											
										
										
											2003-12-21 01:52:09 +00:00
+								    UBool printConvs = FALSE, printCanon = FALSE, printTranslits = FALSE;
-												ICU-1104 add a --list-code code option to generalize the --default-code
one. A good use of this option is to know if a given name is valid, by
calling 'uconv --list-code name 2>/dev/null' and checking for the exit
code. Maybe a -q option is in order to make that easier.

X-SVN-Rev: 7386
											
										
										
											2002-01-05 01:08:01 +00:00
+								    const char *printName = 0;
-												ICU-1220 renamed option -c to be used together with -l.
always print aliases names

X-SVN-Rev: 6746
											
										
										
											2001-11-10 07:01:29 +00:00
-												ICU-2983 fix conversion loops in uconv; add/remove signature (BOM)

X-SVN-Rev: 14205
											
										
										
											2003-12-21 01:52:09 +00:00
+								    UBool verbose = FALSE;
-												ICU-3014 Add explicit u_init() calls to tools and tests in anticipation of static init changes

X-SVN-Rev: 12826
											
										
										
											2003-08-14 21:34:54 +00:00
+								    UErrorCode status = U_ZERO_ERROR;
-												ICU-2983 fix conversion loops in uconv; add/remove signature (BOM)

X-SVN-Rev: 14205
											
										
										
											2003-12-21 01:52:09 +00:00
+								    ConvertFile cf;
-												ICU-3014 Add explicit u_init() calls to tools and tests in anticipation of static init changes

X-SVN-Rev: 12826
											
										
										
											2003-08-14 21:34:54 +00:00
+								    /* Initialize ICU */
 								    u_init(&status);
 								    if (U_FAILURE(status)) {
 								        fprintf(stderr, "%s: can not initialize ICU.  status = %s\n",
 								            argv[0], u_errorName(status));
 								        exit(1);
 								    }
-												ICU-1220 Added escape-codepoint as a callback to let people use the
new UCNV_ESCAPE_CODEPOINT.
Options compatibility with iconv(1): added -V, --version; -s, --silent;
--verbose (and -v because of ICU commands); -o, --output file. uconv(1) is
now mostly compatible with iconv(1) usage, except for the fact that one
cannot convert many files in one run yet.

X-SVN-Rev: 7414
											
										
										
											2002-01-09 01:04:32 +00:00
-												ICU-1220 better pname

X-SVN-Rev: 8203
											
										
										
											2002-03-22 09:50:02 +00:00
+								    // Get and prettify pname.
 								    pname = uprv_strrchr(*argv, U_FILE_SEP_CHAR);
 								#ifdef WIN32
 								    if (!pname) {
 								        pname = uprv_strrchr(*argv, '/');
 								    }
 								#endif
 								    if (!pname) {
 								        pname = *argv;
 								    } else {
-												ICU-900 Fixed some compiler warnings.

X-SVN-Rev: 7714
											
										
										
											2002-02-20 00:22:12 +00:00
+								        ++pname;
-												ICU-1220 better pname

X-SVN-Rev: 8203
											
										
										
											2002-03-22 09:50:02 +00:00
+								    }
-												ICU-1605 for UCNV_ESCAPE_UNICODE, print the codepoint, not the pair of
code units. Also, delimit the U+XXXX with curly braces for now.

X-SVN-Rev: 7514
											
										
										
											2002-01-28 18:47:35 +00:00
-												ICU-1220 Added uconv as an extra (Unix only for now).

X-SVN-Rev: 5825
											
										
										
											2001-09-19 23:43:16 +00:00
+								    // First, get the arguments from command-line
 								    // to know the codepages to convert between
-												ICU-1220 many files can now be transcoded at the same time. I was
tempted to let one do uconv -t utf-8 -f latin1 file1 -f euc-jp file2 so
that many files of various encodings could be converted at the same time
to a single encoding, but will do that later after cleaning up the sloppy
way I enabled multiple files for today.

X-SVN-Rev: 7416
											
										
										
											2002-01-09 01:31:14 +00:00
-												ICU-2983 fix conversion loops in uconv; add/remove signature (BOM)

X-SVN-Rev: 14205
											
										
										
											2003-12-21 01:52:09 +00:00
+								    remainArgv = remainArgvLimit = argv + 1;
-												ICU-1605 for UCNV_ESCAPE_UNICODE, print the codepoint, not the pair of
code units. Also, delimit the U+XXXX with curly braces for now.

X-SVN-Rev: 7514
											
										
										
											2002-01-28 18:47:35 +00:00
+								    for (iter = argv + 1; iter != end; iter++) {
-												ICU-900 Fixed some compiler warnings.

X-SVN-Rev: 7714
											
										
										
											2002-02-20 00:22:12 +00:00
+								        // Check for from charset
 								        if (strcmp("-f", *iter) == 0 || !strcmp("--from-code", *iter)) {
 								            iter++;
 								            if (iter != end)
 								                fromcpage = *iter;
-												ICU-1220 usage() if no arg to -f or -t.

X-SVN-Rev: 7873
											
										
										
											2002-03-03 07:06:27 +00:00
+								            else
 								                usage(pname, 1);
-												ICU-900 Fixed some compiler warnings.

X-SVN-Rev: 7714
											
										
										
											2002-02-20 00:22:12 +00:00
+								        } else if (strcmp("-t", *iter) == 0 || !strcmp("--to-code", *iter)) {
 								            iter++;
 								            if (iter != end)
 								                tocpage = *iter;
-												ICU-1220 usage() if no arg to -f or -t.

X-SVN-Rev: 7873
											
										
										
											2002-03-03 07:06:27 +00:00
+								            else
 								                usage(pname, 1);
-												ICU-900 Fixed some compiler warnings.

X-SVN-Rev: 7714
											
										
										
											2002-02-20 00:22:12 +00:00
+								        } else if (strcmp("-x", *iter) == 0) {
 								            iter++;
 								            if (iter != end)
 								                translit = *iter;
 								            else
 								                usage(pname, 1);
 								        } else if (!strcmp("--fallback", *iter)) {
-												ICU-2983 fix conversion loops in uconv; add/remove signature (BOM)

X-SVN-Rev: 14205
											
										
										
											2003-12-21 01:52:09 +00:00
+								            fallback = TRUE;
-												ICU-900 Fixed some compiler warnings.

X-SVN-Rev: 7714
											
										
										
											2002-02-20 00:22:12 +00:00
+								        } else if (!strcmp("--no-fallback", *iter)) {
-												ICU-2983 fix conversion loops in uconv; add/remove signature (BOM)

X-SVN-Rev: 14205
											
										
										
											2003-12-21 01:52:09 +00:00
+								            fallback = FALSE;
-												ICU-900 Fixed some compiler warnings.

X-SVN-Rev: 7714
											
										
										
											2002-02-20 00:22:12 +00:00
+								        } else if (strcmp("-b", *iter) == 0 || !strcmp("--block-size", *iter)) {
 								            iter++;
 								            if (iter != end) {
 								                bufsz = atoi(*iter);
-												ICU-1680 Fix crash with transliterators that were exceeding the
buffer size.
Also, improve speed a bit.
Finally, add a -b, --block-size size option, which not only is useful
for Real People(tm) wanting a bigger buffer size, but also great for
testing that everything runs smoothly by using -b 1 for example.

X-SVN-Rev: 7548
											
										
										
											2002-01-31 22:44:42 +00:00
+								                if ((int) bufsz <= 0) {
 								                    initMsg(pname);
-												ICU-900 Fixed some compiler warnings.

X-SVN-Rev: 7714
											
										
										
											2002-02-20 00:22:12 +00:00
+								                    UnicodeString str(*iter);
 								                    initMsg(pname);
-												ICU-2983 fix conversion loops in uconv; add/remove signature (BOM)

X-SVN-Rev: 14205
											
										
										
											2003-12-21 01:52:09 +00:00
+								                    u_wmsg(stderr, "badBlockSize", str.getTerminatedBuffer());
-												ICU-900 Fixed some compiler warnings.

X-SVN-Rev: 7714
											
										
										
											2002-02-20 00:22:12 +00:00
+								                    return 3;
 								                }
 								            } else {
 								                usage(pname, 1);
 								            }
 								        } else if (strcmp("-l", *iter) == 0 || !strcmp("--list", *iter)) {
 								            if (printTranslits) {
 								                usage(pname, 1);
 								            }
-												ICU-2983 fix conversion loops in uconv; add/remove signature (BOM)

X-SVN-Rev: 14205
											
										
										
											2003-12-21 01:52:09 +00:00
+								            printConvs = TRUE;
-												ICU-900 Fixed some compiler warnings.

X-SVN-Rev: 7714
											
										
										
											2002-02-20 00:22:12 +00:00
+								        } else if (strcmp("--default-code", *iter) == 0) {
 								            if (printTranslits) {
 								                usage(pname, 1);
 								            }
 								            printName = ucnv_getDefaultName();
 								        } else if (strcmp("--list-code", *iter) == 0) {
 								            if (printTranslits) {
 								                usage(pname, 1);
 								            }
 								            iter++;
 								            if (iter != end) {
 								                UErrorCode e = U_ZERO_ERROR;
 								                printName = ucnv_getAlias(*iter, 0, &e);
 								                if (U_FAILURE(e) || !printName) {
 								                    UnicodeString str(*iter);
 								                    initMsg(pname);
-												ICU-2983 fix conversion loops in uconv; add/remove signature (BOM)

X-SVN-Rev: 14205
											
										
										
											2003-12-21 01:52:09 +00:00
+								                    u_wmsg(stderr, "noSuchCodeset", str.getTerminatedBuffer());
-												ICU-900 Fixed some compiler warnings.

X-SVN-Rev: 7714
											
										
										
											2002-02-20 00:22:12 +00:00
+								                    return 2;
 								                }
 								            } else
 								                usage(pname, 1);
 								        } else if (strcmp("--canon", *iter) == 0) {
-												ICU-2983 fix conversion loops in uconv; add/remove signature (BOM)

X-SVN-Rev: 14205
											
										
										
											2003-12-21 01:52:09 +00:00
+								            printCanon = TRUE;
-												ICU-900 Fixed some compiler warnings.

X-SVN-Rev: 7714
											
										
										
											2002-02-20 00:22:12 +00:00
+								        } else if (strcmp("-L", *iter) == 0
 								            || !strcmp("--list-transliterators", *iter)) {
 								            if (printConvs) {
 								                usage(pname, 1);
 								            }
-												ICU-2983 fix conversion loops in uconv; add/remove signature (BOM)

X-SVN-Rev: 14205
											
										
										
											2003-12-21 01:52:09 +00:00
+								            printTranslits = TRUE;
-												ICU-900 Fixed some compiler warnings.

X-SVN-Rev: 7714
											
										
										
											2002-02-20 00:22:12 +00:00
+								        } else if (strcmp("-h", *iter) == 0 || !strcmp("-?", *iter)
 								            || !strcmp("--help", *iter)) {
 								            usage(pname, 0);
 								        } else if (!strcmp("-c", *iter)) {
 								            fromucallback = UCNV_FROM_U_CALLBACK_SKIP;
 								        } else if (!strcmp("--to-callback", *iter)) {
 								            iter++;
 								            if (iter != end) {
 								                const struct callback_ent *cbe = findCallback(*iter);
 								                if (cbe) {
 								                    fromucallback = cbe->fromu;
 								                    fromuctxt = cbe->fromuctxt;
 								                } else {
 								                    UnicodeString str(*iter);
 								                    initMsg(pname);
-												ICU-2983 fix conversion loops in uconv; add/remove signature (BOM)

X-SVN-Rev: 14205
											
										
										
											2003-12-21 01:52:09 +00:00
+								                    u_wmsg(stderr, "unknownCallback", str.getTerminatedBuffer());
-												ICU-900 Fixed some compiler warnings.

X-SVN-Rev: 7714
											
										
										
											2002-02-20 00:22:12 +00:00
+								                    return 4;
-												ICU-1680 Fix crash with transliterators that were exceeding the
buffer size.
Also, improve speed a bit.
Finally, add a -b, --block-size size option, which not only is useful
for Real People(tm) wanting a bigger buffer size, but also great for
testing that everything runs smoothly by using -b 1 for example.

X-SVN-Rev: 7548
											
										
										
											2002-01-31 22:44:42 +00:00
+								                }
 								            } else {
-												ICU-900 Fixed some compiler warnings.

X-SVN-Rev: 7714
											
										
										
											2002-02-20 00:22:12 +00:00
+								                usage(pname, 1);
-												ICU-1680 Fix crash with transliterators that were exceeding the
buffer size.
Also, improve speed a bit.
Finally, add a -b, --block-size size option, which not only is useful
for Real People(tm) wanting a bigger buffer size, but also great for
testing that everything runs smoothly by using -b 1 for example.

X-SVN-Rev: 7548
											
										
										
											2002-01-31 22:44:42 +00:00
+								            }
-												ICU-900 Fixed some compiler warnings.

X-SVN-Rev: 7714
											
										
										
											2002-02-20 00:22:12 +00:00
+								        } else if (!strcmp("--from-callback", *iter)) {
 								            iter++;
 								            if (iter != end) {
 								                const struct callback_ent *cbe = findCallback(*iter);
 								                if (cbe) {
 								                    toucallback = cbe->tou;
 								                    touctxt = cbe->touctxt;
 								                } else {
 								                    UnicodeString str(*iter);
 								                    initMsg(pname);
-												ICU-2983 fix conversion loops in uconv; add/remove signature (BOM)

X-SVN-Rev: 14205
											
										
										
											2003-12-21 01:52:09 +00:00
+								                    u_wmsg(stderr, "unknownCallback", str.getTerminatedBuffer());
-												ICU-900 Fixed some compiler warnings.

X-SVN-Rev: 7714
											
										
										
											2002-02-20 00:22:12 +00:00
+								                    return 4;
 								                }
 								            } else {
 								                usage(pname, 1);
 								            }
 								        } else if (!strcmp("-i", *iter)) {
 								            toucallback = UCNV_TO_U_CALLBACK_SKIP;
 								        } else if (!strcmp("--callback", *iter)) {
 								            iter++;
 								            if (iter != end) {
 								                const struct callback_ent *cbe = findCallback(*iter);
 								                if (cbe) {
 								                    fromucallback = cbe->fromu;
 								                    fromuctxt = cbe->fromuctxt;
 								                    toucallback = cbe->tou;
 								                    touctxt = cbe->touctxt;
 								                } else {
 								                    UnicodeString str(*iter);
 								                    initMsg(pname);
-												ICU-2983 fix conversion loops in uconv; add/remove signature (BOM)

X-SVN-Rev: 14205
											
										
										
											2003-12-21 01:52:09 +00:00
+								                    u_wmsg(stderr, "unknownCallback", str.getTerminatedBuffer());
-												ICU-900 Fixed some compiler warnings.

X-SVN-Rev: 7714
											
										
										
											2002-02-20 00:22:12 +00:00
+								                    return 4;
 								                }
 								            } else {
 								                usage(pname, 1);
 								            }
 								        } else if (!strcmp("-s", *iter) || !strcmp("--silent", *iter)) {
-												ICU-2983 fix conversion loops in uconv; add/remove signature (BOM)

X-SVN-Rev: 14205
											
										
										
											2003-12-21 01:52:09 +00:00
+								            verbose = FALSE;
-												ICU-900 Fixed some compiler warnings.

X-SVN-Rev: 7714
											
										
										
											2002-02-20 00:22:12 +00:00
+								        } else if (!strcmp("-v", *iter) || !strcmp("--verbose", *iter)) {
-												ICU-2983 fix conversion loops in uconv; add/remove signature (BOM)

X-SVN-Rev: 14205
											
										
										
											2003-12-21 01:52:09 +00:00
+								            verbose = TRUE;
-												ICU-900 Fixed some compiler warnings.

X-SVN-Rev: 7714
											
										
										
											2002-02-20 00:22:12 +00:00
+								        } else if (!strcmp("-V", *iter) || !strcmp("--version", *iter)) {
-												ICU-2983 fix conversion loops in uconv; add/remove signature (BOM)

X-SVN-Rev: 14205
											
										
										
											2003-12-21 01:52:09 +00:00
+								            printf("%s v2.1  ICU " U_ICU_VERSION "\n", pname);
-												ICU-900 Fixed some compiler warnings.

X-SVN-Rev: 7714
											
										
										
											2002-02-20 00:22:12 +00:00
+								            return 0;
 								        } else if (!strcmp("-o", *iter) || !strcmp("--output", *iter)) {
 								            ++iter;
 								            if (iter != end && !outfilestr) {
 								                outfilestr = *iter;
 								            } else {
 								                usage(pname, 1);
 								            }
-												ICU-2983 fix conversion loops in uconv; add/remove signature (BOM)

X-SVN-Rev: 14205
											
										
										
											2003-12-21 01:52:09 +00:00
+								        } else if (0 == strcmp("--add-signature", *iter)) {
 								            cf.signature = 1;
 								        } else if (0 == strcmp("--remove-signature", *iter)) {
 								            cf.signature = -1;
-												ICU-900 Fixed some compiler warnings.

X-SVN-Rev: 7714
											
										
										
											2002-02-20 00:22:12 +00:00
+								        } else if (**iter == '-' && (*iter)[1]) {
 								            usage(pname, 1);
-												ICU-2983 fix conversion loops in uconv; add/remove signature (BOM)

X-SVN-Rev: 14205
											
										
										
											2003-12-21 01:52:09 +00:00
+								        } else {
 								            // move a non-option up in argv[]
 								            *remainArgvLimit++ = *iter;
-												ICU-900 Fixed some compiler warnings.

X-SVN-Rev: 7714
											
										
										
											2002-02-20 00:22:12 +00:00
+								        }
-												ICU-1220 Added uconv as an extra (Unix only for now).

X-SVN-Rev: 5825
											
										
										
											2001-09-19 23:43:16 +00:00
+								    }
-												ICU-1104 add a --list-code code option to generalize the --default-code
one. A good use of this option is to know if a given name is valid, by
calling 'uconv --list-code name 2>/dev/null' and checking for the exit
code. Maybe a -q option is in order to make that easier.

X-SVN-Rev: 7386
											
										
										
											2002-01-05 01:08:01 +00:00
+								    if (printConvs || printName) {
-												ICU-900 Fixed some compiler warnings.

X-SVN-Rev: 7714
											
										
										
											2002-02-20 00:22:12 +00:00
+								        return printConverters(pname, printName, printCanon) ? 2 : 0;
-												ICU-1220 implemented -L, --list-transliterators.
made the -x transliterator option work.
updated manual page and resource bundle to reflect that.

X-SVN-Rev: 7396
											
										
										
											2002-01-07 20:45:00 +00:00
+								    } else if (printTranslits) {
-												ICU-900 Fixed some compiler warnings.

X-SVN-Rev: 7714
											
										
										
											2002-02-20 00:22:12 +00:00
+								        return printTransliterators(printCanon) ? 3 : 0;
-												ICU-1220 renamed option -c to be used together with -l.
always print aliases names

X-SVN-Rev: 6746
											
										
										
											2001-11-10 07:01:29 +00:00
+								    }
-												ICU-1220 allows - for -f and -t (same as not giving them).
more documentation.
changed some error messages.

X-SVN-Rev: 7694
											
										
										
											2002-02-16 18:45:11 +00:00
+								    if (!fromcpage || !uprv_strcmp(fromcpage, "-")) {
-												ICU-1220 make the -f and -t arguments optional,
and use the default encoding if they're not specified (I am split
on this between the default encoding and UTF-8). This allows one
to quickly use transliterators without having to pass these options.

X-SVN-Rev: 7693
											
										
										
											2002-02-16 09:15:06 +00:00
+								        fromcpage = ucnv_getDefaultName();
-												ICU-1220 Added uconv as an extra (Unix only for now).

X-SVN-Rev: 5825
											
										
										
											2001-09-19 23:43:16 +00:00
+								    }
-												ICU-1220 allows - for -f and -t (same as not giving them).
more documentation.
changed some error messages.

X-SVN-Rev: 7694
											
										
										
											2002-02-16 18:45:11 +00:00
+								    if (!tocpage || !uprv_strcmp(tocpage, "-")) {
-												ICU-1220 make the -f and -t arguments optional,
and use the default encoding if they're not specified (I am split
on this between the default encoding and UTF-8). This allows one
to quickly use transliterators without having to pass these options.

X-SVN-Rev: 7693
											
										
										
											2002-02-16 09:15:06 +00:00
+								        tocpage = ucnv_getDefaultName();
-												ICU-1220 Added uconv as an extra (Unix only for now).

X-SVN-Rev: 5825
											
										
										
											2001-09-19 23:43:16 +00:00
+								    }
-												ICU-1220 make the -f and -t arguments optional,
and use the default encoding if they're not specified (I am split
on this between the default encoding and UTF-8). This allows one
to quickly use transliterators without having to pass these options.

X-SVN-Rev: 7693
											
										
										
											2002-02-16 09:15:06 +00:00
-												ICU-1220 Added escape-codepoint as a callback to let people use the
new UCNV_ESCAPE_CODEPOINT.
Options compatibility with iconv(1): added -V, --version; -s, --silent;
--verbose (and -v because of ICU commands); -o, --output file. uconv(1) is
now mostly compatible with iconv(1) usage, except for the fact that one
cannot convert many files in one run yet.

X-SVN-Rev: 7414
											
										
										
											2002-01-09 01:04:32 +00:00
+								    // Open the correct output file or connect to stdout for reading input
-												ICU-1605 for UCNV_ESCAPE_UNICODE, print the codepoint, not the pair of
code units. Also, delimit the U+XXXX with curly braces for now.

X-SVN-Rev: 7514
											
										
										
											2002-01-28 18:47:35 +00:00
+								    if (outfilestr != 0 && strcmp(outfilestr, "-")) {
-												ICU-900 Fixed some compiler warnings.

X-SVN-Rev: 7714
											
										
										
											2002-02-20 00:22:12 +00:00
+								        outfile = fopen(outfilestr, "wb");
 								        if (outfile == 0) {
 								            UnicodeString str1(outfilestr, "");
 								            UnicodeString str2(strerror(errno), "");
 								            initMsg(pname);
-												ICU-1220 change u_wmsg(); consistently use it in uconv.

X-SVN-Rev: 8113
											
										
										
											2002-03-19 17:14:10 +00:00
+								            u_wmsg(stderr, "cantCreateOutputF",
-												ICU-900 Fixed some compiler warnings.

X-SVN-Rev: 7714
											
										
										
											2002-02-20 00:22:12 +00:00
+								                str1.getBuffer(), str2.getBuffer());
 								            return 1;
 								        }
-												ICU-1220 Added escape-codepoint as a callback to let people use the
new UCNV_ESCAPE_CODEPOINT.
Options compatibility with iconv(1): added -V, --version; -s, --silent;
--verbose (and -v because of ICU commands); -o, --output file. uconv(1) is
now mostly compatible with iconv(1) usage, except for the fact that one
cannot convert many files in one run yet.

X-SVN-Rev: 7414
											
										
										
											2002-01-09 01:04:32 +00:00
+								    } else {
-												ICU-900 Fixed some compiler warnings.

X-SVN-Rev: 7714
											
										
										
											2002-02-20 00:22:12 +00:00
+								        outfilestr = "-";
 								        outfile = stdout;
-												ICU-3389 Fix the case when newline translation is or is not used.

X-SVN-Rev: 14734
											
										
										
											2004-03-23 06:46:35 +00:00
+								#if defined(WIN32) || defined(U_CYGWIN)
-												ICU-900 Fixed some compiler warnings.

X-SVN-Rev: 7714
											
										
										
											2002-02-20 00:22:12 +00:00
+								        if (setmode(fileno(outfile), O_BINARY) == -1) {
-												ICU-1220 change u_wmsg(); consistently use it in uconv.

X-SVN-Rev: 8113
											
										
										
											2002-03-19 17:14:10 +00:00
+								            u_wmsg(stderr, "cantSetOutBinMode");
-												ICU-900 Fixed some compiler warnings.

X-SVN-Rev: 7714
											
										
										
											2002-02-20 00:22:12 +00:00
+								            exit(-1);
 								        }
-												ICU-1220 Added uconv as an extra (Unix only for now).

X-SVN-Rev: 5825
											
										
										
											2001-09-19 23:43:16 +00:00
+								#endif
-												ICU-1220 Added escape-codepoint as a callback to let people use the
new UCNV_ESCAPE_CODEPOINT.
Options compatibility with iconv(1): added -V, --version; -s, --silent;
--verbose (and -v because of ICU commands); -o, --output file. uconv(1) is
now mostly compatible with iconv(1) usage, except for the fact that one
cannot convert many files in one run yet.

X-SVN-Rev: 7414
											
										
										
											2002-01-09 01:04:32 +00:00
+								    }
-												ICU-1220 implemented -L, --list-transliterators.
made the -x transliterator option work.
updated manual page and resource bundle to reflect that.

X-SVN-Rev: 7396
											
										
										
											2002-01-07 20:45:00 +00:00
-												ICU-1220 many files can now be transcoded at the same time. I was
tempted to let one do uconv -t utf-8 -f latin1 file1 -f euc-jp file2 so
that many files of various encodings could be converted at the same time
to a single encoding, but will do that later after cleaning up the sloppy
way I enabled multiple files for today.

X-SVN-Rev: 7416
											
										
										
											2002-01-09 01:31:14 +00:00
+								    /* Loop again on the arguments to find all the input files, and
-												ICU-2983 fix conversion loops in uconv; add/remove signature (BOM)

X-SVN-Rev: 14205
											
										
										
											2003-12-21 01:52:09 +00:00
+								    convert them. */
-												ICU-1220 many files can now be transcoded at the same time. I was
tempted to let one do uconv -t utf-8 -f latin1 file1 -f euc-jp file2 so
that many files of various encodings could be converted at the same time
to a single encoding, but will do that later after cleaning up the sloppy
way I enabled multiple files for today.

X-SVN-Rev: 7416
											
										
										
											2002-01-09 01:31:14 +00:00
-												ICU-2983 fix conversion loops in uconv; add/remove signature (BOM)

X-SVN-Rev: 14205
											
										
										
											2003-12-21 01:52:09 +00:00
+								    cf.setBufferSize(bufsz);
 								    if(remainArgv < remainArgvLimit) {
 								        for (iter = remainArgv; iter != remainArgvLimit; iter++) {
 								            if (!cf.convertFile(
 								                    pname, fromcpage, toucallback, touctxt, tocpage,
 								                    fromucallback, fromuctxt, fallback, translit, *iter,
 								                    outfile, verbose)
 								            ) {
-												ICU-900 Fixed some compiler warnings.

X-SVN-Rev: 7714
											
										
										
											2002-02-20 00:22:12 +00:00
+								                goto error_exit;
 								            }
 								        }
-												ICU-2983 fix conversion loops in uconv; add/remove signature (BOM)

X-SVN-Rev: 14205
											
										
										
											2003-12-21 01:52:09 +00:00
+								    } else {
 								        if (!cf.convertFile(
 								                pname, fromcpage, toucallback, touctxt, tocpage,
 								                fromucallback, fromuctxt, fallback, translit, 0,
 								                outfile, verbose)
 								        ) {
-												ICU-900 Fixed some compiler warnings.

X-SVN-Rev: 7714
											
										
										
											2002-02-20 00:22:12 +00:00
+								            goto error_exit;
 								        }
-												ICU-1220 many files can now be transcoded at the same time. I was
tempted to let one do uconv -t utf-8 -f latin1 file1 -f euc-jp file2 so
that many files of various encodings could be converted at the same time
to a single encoding, but will do that later after cleaning up the sloppy
way I enabled multiple files for today.

X-SVN-Rev: 7416
											
										
										
											2002-01-09 01:31:14 +00:00
+								    }
-												ICU-1220 Added uconv as an extra (Unix only for now).

X-SVN-Rev: 5825
											
										
										
											2001-09-19 23:43:16 +00:00
 								    goto normal_exit;
-												ICU-900 Fixed some compiler warnings.

X-SVN-Rev: 7714
											
										
										
											2002-02-20 00:22:12 +00:00
+								error_exit:
-												ICU-1220 Added uconv as an extra (Unix only for now).

X-SVN-Rev: 5825
											
										
										
											2001-09-19 23:43:16 +00:00
+								    ret = 1;
-												ICU-900 Fixed some compiler warnings.

X-SVN-Rev: 7714
											
										
										
											2002-02-20 00:22:12 +00:00
+								normal_exit:
-												ICU-1220 Added uconv as an extra (Unix only for now).

X-SVN-Rev: 5825
											
										
										
											2001-09-19 23:43:16 +00:00
-												ICU-2983 fix conversion loops in uconv; add/remove signature (BOM)

X-SVN-Rev: 14205
											
										
										
											2003-12-21 01:52:09 +00:00
+								    if (outfile != stdout) {
-												ICU-900 Fixed some compiler warnings.

X-SVN-Rev: 7714
											
										
										
											2002-02-20 00:22:12 +00:00
+								        fclose(outfile);
-												ICU-2983 fix conversion loops in uconv; add/remove signature (BOM)

X-SVN-Rev: 14205
											
										
										
											2003-12-21 01:52:09 +00:00
+								    }
-												ICU-1220 Added escape-codepoint as a callback to let people use the
new UCNV_ESCAPE_CODEPOINT.
Options compatibility with iconv(1): added -V, --version; -s, --silent;
--verbose (and -v because of ICU commands); -o, --output file. uconv(1) is
now mostly compatible with iconv(1) usage, except for the fact that one
cannot convert many files in one run yet.

X-SVN-Rev: 7414
											
										
										
											2002-01-09 01:04:32 +00:00
-												ICU-1220 Added uconv as an extra (Unix only for now).

X-SVN-Rev: 5825
											
										
										
											2001-09-19 23:43:16 +00:00
+								    return ret;
 								}
-												ICU-1220 added a --list-converters option.

X-SVN-Rev: 6743
											
										
										
											2001-11-10 06:52:25 +00:00
 								/*
 								 * Hey, Emacs, please set the following:
 								 *
 								 * Local Variables:
 								 * indent-tabs-mode: nil
 								 * End:
 								 *
 								 */