scuffed-code/icu4c/source/samples/XMLConverter/XMLConverter.cpp

/*
*******************************************************************************
*                                                                             *
* COPYRIGHT:                                                                  *
*   (C) Copyright International Business Machines Corporation, 1998, 1999     *
*   Licensed Material - Program-Property of IBM - All Rights Reserved.        *
*   US Government Users Restricted Rights - Use, duplication, or disclosure   *
*   restricted by GSA ADP Schedule Contract with IBM Corp.                    *
*                                                                             *
*******************************************************************************
*
*/
// XMLConverter.cpp
// To convert one encoded XML file to another

#include <stdio.h>
#include <stdlib.h>
#include <string.h>
#include <assert.h>

#ifdef _WIN32
#  include <windows.h>
#endif

#include "utypes.h"
#include "ustring.h"
#include "ucnv.h"
#include "ucnv_err.h"
#include "uloc.h"
#include "uchar.h"

#define MAXFILENAMELEN  1024
#define RAWBUFSIZE       4096
#define ENCODINGCOUNT   5
#define FIRSTLINEBUF    256
typedef unsigned char  BYTE;


char firstLine[128];
char encodingNameInFile[256];
bool verbose = false;

extern void convertFile(char*, char*, char*, UConverter*);
extern void usage();
extern void printChars(unsigned char*, int);
extern int getInputEncodingType(const BYTE* rawBuffer, 
                                unsigned long byteCount);
extern long convertFirstLine(FILE* inF, 
                             char* inEncName, 
                             FILE* outF, 
                             char* outEncName, 
                             char* ptrBuf, 
                             unsigned long toRead, 
                             UChar* uBuf);
extern void catString(char* thisString, bool quote);
extern int32_t  XMLUConvert( UConverter* inConverter,
                      UConverter* outConverter,
                      const char* inBuffer, 
                      int32_t* inBufSize, 
                      char* outBuffer, 
                      int32_t outBufCapacity, 
                      bool_t flush,
                      UErrorCode* err);
extern void XMLU_fromCodepageToCodepage(    UConverter*    outConverter,
                        UConverter*    inConverter,
                        char**         target,
                        const char*        targetLimit,
                        const char**        source,
                        const char*        sourceLimit,
                        int32_t*        offsets,
                        bool_t            flush,
                        UErrorCode*        err);

static const BYTE    gEBCDICPre[]    = { 0x4C, 0x6F, 0xA7, 0x94 };
#if 0
//not supported encodings
static const BYTE    gUCS4BPre[]     = { 0x00, 0x00, 0x00, 0x3C };
static const BYTE    gUCS4LPre[]     = { 0x3C, 0x00, 0x00, 0x00 };
#endif 
static const BYTE    gUTF16BPre[]    = { 0x00, 0x3C, 0x00, 0x3F };
static const BYTE    gUTF16LPre[]    = { 0x3C, 0x00, 0x3F, 0x00 };
static const char    gXMLDecl_ASCII[]= { 0x3C, 0x3F, 0x78, 0x6D, 0x6C };

enum Encodings
    {
        EBCDIC          = 0,
        UCS_4B          = 1,
        UCS_4L          = 2,
        US_ASCII        = 3,
        UTF_8           = 4,
        UTF_16B         = 5,
        UTF_16L         = 6,

        Encodings_Count = ENCODINGCOUNT,
        Encodings_Min   = EBCDIC,
        Encodings_Max   = UTF_16L,

        OtherEncoding   = 999
    };


void usage(char *  exeName)
{
    fprintf(stdout, "\n USAGE: \n \t%s [-h] [-v] -e trgEncName inputFile outputFile \n\n", exeName);
    fprintf(stdout, " %s    = Exe name \n ", exeName);
	fprintf(stdout, "-h     \t= to get help (this information!) \n ");
    fprintf(stdout, "-v     \t= set verbose on; \n \t\t  to get more information about the conversion process \n ");
    fprintf(stdout, "-e     \t= This is a mandatory option and follows with the targetEncName");
    fprintf(stdout, "       \t\t  E.g., output encoding can be like : \n \t\t  ascii, utf8, utf-16be, utf-16le, ebcdic-cp-us \n");
    fprintf(stdout, "trgEncName  \t= The output encoding type needed. \n \t\t  It always should follow the -e switch\n");
    fprintf(stdout, "inputFile     \t= The input XML file name \n");
    fprintf(stdout, "outputFile    \t= The output XML file name \n");
    fprintf(stdout, " \n For example: \n ");
    fprintf(stdout, " \t %s -e utf8 pr-utf-16.xml pr-utf-8.xml \n\n\n ", exeName);
}


int main(int argc, char** argv)
{
    UErrorCode err = U_ZERO_ERROR;
    char* inFileName; 
    char* outFileName;
    char * encName = NULL;

	UConverter*  conv = NULL;

    for (int i=0; i< argc; i++)
    {
        if (!strcmp( argv[i], "-h") || (argc < 5) )
        {
            usage(argv[0]);
            exit(1);
        }
        if (!strcmp( argv[i], "-v"))
            verbose = true;
        if (!strcmp( argv[i], "-e"))
        {
            if ( argc == i+4)
            {
                encName = new char[strlen(argv[i+1]) +1];
                strcpy(encName, argv[i+1]);
                inFileName = new char[strlen(argv[i+2]) +1];
                strcpy(inFileName, argv[i+2]);
                outFileName = new char[strlen(argv[i+3]) +1];
                strcpy(outFileName, argv[i+3]);
                break;
            }
            else
            {
                usage(argv[0]);
                exit(1);
            }
        }
    }

	conv = ucnv_open(encName, &err);
	if (U_FAILURE(err))
	{
        if (verbose)
        {
            fprintf(stderr, "Could not create converter to: %s\n", encName);
#if defined(_DEBUG) && defined(XP_CPLUSPLUS)
	    	fprintf (stderr,"FAILURE! (%s) (%d)\n", errorName(err), err);
#endif
        }
        ucnv_close(conv);
        exit(1);
    }
	
    fprintf(stdout, "Converting %s to %s...\n", inFileName, outFileName);
	convertFile(encName, inFileName, outFileName, conv);
	fprintf(stdout, "Finished transcoding file: %s\n", inFileName);
    
    ucnv_close(conv);
    if (encName)
        delete encName;
    return 0;
}

void convertFile(char* encName, char* iFN, char* oFN, UConverter* outConvrtr)
{
    //Read the input file
    //
    FILE* inFile = fopen( iFN, "rb");
    if (inFile == NULL) {
        if (verbose)
            fprintf(stderr, "Could not open input file - %s for reading \n", iFN);
        exit(1);
    }

    FILE*   outFile = fopen(oFN, "wb");
	if (outFile == NULL)
	{
        if (verbose)
		    fprintf(stderr, "Could not open output file - %s for writing \n", oFN);
		fclose(inFile);
		return;
	}

    char            rawBuf[RAWBUFSIZE];
    char*           pRawBuf     = NULL;
    unsigned long   bytesRead   = 0;
	UErrorCode       err         = U_ZERO_ERROR;
	
    //get the file size
    //
    unsigned int    curPos      = ftell(inFile);

    if(verbose)
      fprintf(stderr, "curPos = %d\n", curPos);

    if (curPos == 0xFFFFFFFF)
    {
        fprintf(stderr, "fileSize - Could not save current pos \n");
        exit(1);
    }

    // Seek to the end and save that value for return
    //
    if ( fseek(inFile, 0 , SEEK_END) )
    {
        fprintf(stderr, "fileSize - Could not seek to end \n");
        exit(1);
    }

    const unsigned int endPos = ftell(inFile);
    if (endPos == 0xFFFFFFFF)
    {
        fprintf(stderr, "fileSize - Could not get the end pos \n");
        exit(1);
    }

    // And put the pointer back
    //
    if (fseek(inFile, curPos, SEEK_SET))
    {
        fprintf(stderr, "fileSize - Could not seek back to original pos \n");
        exit(1);
    }

    if (curPos >= endPos) 
    {
        fprintf(stderr,"Reached end of input file while reading \n");
        exit(1);
    } 
    
    unsigned int    bytesLeft   = endPos - curPos; 
    if (verbose)
        fprintf(stdout,"Input file size is %d \n", bytesLeft);
    
    unsigned int toRead = (RAWBUFSIZE > bytesLeft) ? bytesLeft : RAWBUFSIZE;
    
    //Read the infile    
    //
    bytesRead = fread( (void*)rawBuf, 1, toRead, inFile);
    if (ferror(inFile)) 
    {
        fprintf(stderr," couldnot read file for input encoding \n");
        exit(1);
    }
    
    if (bytesRead ==  0) 
    {
        fprintf(stderr," couldnot fill raw buffer \n");
        exit(1);
    }
    pRawBuf = rawBuf;

    // get the input encoding type
    int inputEnc = getInputEncodingType((const BYTE*)rawBuf, bytesRead); 
    if (inputEnc == OtherEncoding)
    {
        fprintf(stderr, " Unknown encoded input file. \n Only input encodings supported in the first line are \n");
        fprintf(stderr, " ascii, ebcdic-cp-us, utf8, utf-16be, utf-16le \n");
        exit(1);
    }
    
    //transcoding the first line from inEncodName to ascii and then replacing
    //the encoding=inEncodingName to encoding=outEncodingName
    //
    
    UChar          ucBuf[RAWBUFSIZE];
    char * inEncodName;
    char* tmpPtr = (char*) rawBuf;
    
    //get the input encoding name
    //
    switch (inputEnc)
    {
    case 0 : 
        inEncodName = new char[strlen("ebcdic-cp-us") +1];
        strcpy(inEncodName, "ebcdic-cp-us");
        break;
    case 3 :
        inEncodName = new char[strlen("ascii") +1];
        strcpy(inEncodName, "ascii");
        break;
    case 4 :
        inEncodName = new char[strlen("utf8") +1];
        strcpy(inEncodName, "utf8");
        break;
    case 5 :
        inEncodName = new char[strlen("utf-16be") +1];
        strcpy(inEncodName, "utf-16be");
        break;
    case 6 :
        inEncodName = new char[strlen("utf-16le") +1];
        strcpy(inEncodName, "utf-16le");
        break;
    default :
        break;
    };

    if(verbose)
      {
	fprintf(stderr, "inConverter = %s\n", inEncodName);
      }
    
    UConverter* inConvrtr = ucnv_open(inEncodName, &err);
    //now read and transcode the input to output file
    //Process the firstline separately
    //
    long afterFirstLine = convertFirstLine(inFile, inEncodName, outFile, encName, 
					   pRawBuf, toRead, (UChar*)ucBuf);
    
    //move the pointer after the first line
    //
    if (fseek(inFile, (unsigned long) afterFirstLine, SEEK_SET))
    {
        fprintf(stderr, "fileSize - Could not set the cursor to %d after the first line \n", afterFirstLine);
        exit(1);
    }
    else
      if(verbose)
	fprintf(stderr,"Seeked to %d OK \n", afterFirstLine);
    bytesLeft = endPos - afterFirstLine;
    toRead = (RAWBUFSIZE > bytesLeft) ? bytesLeft : RAWBUFSIZE;
    
    //  read the rest of the input file
    //
    if (verbose)
        fprintf(stdout,"The first line consists of %d bytes \n", afterFirstLine);
    if (encodingNameInFile !=NULL) 
    {
        if (inEncodName)
            delete inEncodName;
        inEncodName = new char[strlen(encodingNameInFile)+1];
        strcpy(inEncodName, encodingNameInFile);
        ucnv_close(inConvrtr);
        inConvrtr = ucnv_open(inEncodName, &err);
    }
    if (verbose)
        fprintf(stdout, "Input Encoding type = %s,  Output Encoding type = %s \n", inEncodName, encName);
    
    char *outBuf = new char[RAWBUFSIZE];
    int  outBufSize = RAWBUFSIZE;
    bool tFlush = false;
    err = U_ZERO_ERROR;
   
    if (verbose)
        fprintf(stdout, "processing the rest of the file \n");
    while( (bytesRead = fread((void *) rawBuf, 1, toRead, inFile)) > 0 || !tFlush)
    {
         int32_t  bytesNeeded = XMLUConvert( inConvrtr,
                      outConvrtr,
                      pRawBuf, 
                      (int32_t*)&bytesRead, 
                      outBuf, 
                      outBufSize, 
                      tFlush,
                      &err);
         if (bytesNeeded > 0)
         {
             long bout =
                 fwrite((void *) outBuf, 1, bytesNeeded, outFile);
             if (bout != bytesNeeded)
             {
                 fprintf(stderr, "Wrote only %d bytes.\n", bout);
                 fclose(inFile);
                 fclose(outFile);
             }
         }

        if ((err != U_BUFFER_OVERFLOW_ERROR) && U_FAILURE(err) )
        {
#if defined(_DEBUG)
            fprintf (stderr, "Error transcoding rest of the file: (%s) %d\n", errorName(err), err);
#endif
            fclose(inFile);
            fclose(outFile);
            exit(1);
        }
        if ((bytesRead > 0) && (err !=U_ZERO_ERROR))
        {
	  if(verbose)
	    fprintf(stderr, "err=%d * read %d bytes\n", err,bytesRead);

            if (fseek(inFile, (curPos+bytesRead), SEEK_SET))
            {
                fprintf(stderr, "fileSize - Could not set the input cursor to %d (curpos=%d, bytesRead=%d)\n", curPos+bytesRead,curPos,bytesRead);
                exit(1);
            }
            curPos = ftell(inFile);
            bytesLeft = endPos - curPos;
        }
        else 
        {
            curPos = ftell(inFile);
            bytesLeft = endPos - curPos;
        }
        toRead = (RAWBUFSIZE > bytesLeft) ? bytesLeft : RAWBUFSIZE;
        if (toRead < RAWBUFSIZE) tFlush = true;
        if (err == U_BUFFER_OVERFLOW_ERROR)
            err = U_ZERO_ERROR;
    }
    ucnv_close(inConvrtr);
    delete inEncodName;
    fclose(inFile);
    fclose(outFile);
};


int getInputEncodingType(const BYTE* rawBuffer, unsigned long byteCount)
{
    //match the first four bytes of the input buffer with the encoding types available
    //checking for ASCII
    //
    if (byteCount > 5)  
    {
        if (!memcmp(rawBuffer, gXMLDecl_ASCII, 5))
        return US_ASCII;
    }
     
    //  If the count of raw bytes is less than 2, it cannot be anything
    //  we understand, so return UTF-8 as a fallback.
    //
    if (byteCount < 2)
        return  UTF_8;

    //  We know its at least two bytes, so lets check for a UTF-16 BOM. 
    //
    if ((rawBuffer[0] == 0xFE) && (rawBuffer[1] == 0xFF))
        return UTF_16B;
    else if ((rawBuffer[0] == 0xFF) && (rawBuffer[1] == 0xFE))
        return UTF_16L;
    
    //  Oh well, not one of those. So now lets see if we have at least 4
    //  bytes. If not, then we are out of ideas and can return UTF-8 as the
    //  fallback.
    //
    if (byteCount < 4)
        return OtherEncoding;
    
    //  We have at least 4 bytes. So lets check the 4 byte sequences that
    //  indicate other UTF-16 encodings.
    //
    if ((rawBuffer[0] == 0x00) || (rawBuffer[0] == 0x3C))
    {
#if 0
        //not supported encodings
        if (!memcmp(rawBuffer, gUCS4BPre, 4))
            return UCS_4B;
        else if (!memcmp(rawBuffer, gUCS4LPre, 4))
            return UCS_4L;
        else 
#endif
            if (!memcmp(rawBuffer, gUTF16BPre, 4))
            return UTF_16B;
        else if (!memcmp(rawBuffer, gUTF16LPre, 4))
            return UTF_16L;
    }
    
    //  See if we have enough bytes to possibly match the EBCDIC prefix.
    //  If so, try it.
    //
    if (!memcmp(rawBuffer, gEBCDICPre, 4))
         return EBCDIC;

    //  Does not seem to be anything we know, so go with UTF-8 to get at
    //  least through the first line and see what it really is.
    //
    return OtherEncoding;
}


long convertFirstLine( FILE* inF, char* inEncName, 
                       FILE* outF, char* outEncName, 
                       char* ptrBuf, unsigned long toRead, 
                       UChar* uBuf)
{
    //Here we read the inputFile with the specified buffer size.
    //Then convert this to ascii. then read the first line and convert to 
    //output and input encoding types and return for rest of the conversion
    //

    if (fseek(inF, 0, SEEK_SET))
    {
        fprintf(stderr, "file - Could not seek the begin pos \n");
        exit(1);
    }

    unsigned long bytesRead = fread( (void*)ptrBuf, 1, toRead, inF);

    char            tempBuf[RAWBUFSIZE];
    int             bufLength       = 0;
    long            bytesNeeded     = 0;
    UErrorCode      err             = U_ZERO_ERROR;

    bytesNeeded = ucnv_convert("ascii",
			inEncName,
			(char*) tempBuf,
			0,
			(const char*) ptrBuf,
			bytesRead,
			&err);
    
    if (err == U_BUFFER_OVERFLOW_ERROR)
    {
	    err = U_ZERO_ERROR;
    }
	else if (U_FAILURE(err))
	{
#if defined(_DEBUG)
		printf ("Error transcoding first line of input file: (%s) %d\n", errorName(err), err);
#endif
        fclose(inF);
    	fclose(outF);
        exit(1);
	}

    ucnv_convert("ascii",
			inEncName,
			(char*) tempBuf,
			bytesNeeded,
			(const char*) ptrBuf,
			bytesRead,
			&err);										 

	if (U_FAILURE(err))
	{
#if defined(_DEBUG)
		printf ("Error transcoding2 first line of input file: (%s) %d\n", errorName(err), err);
#endif
        fclose(inF);
    	fclose(outF);
        exit(1);
    } 
    else
	{
        //read the tempBuf to get the first line
        //
        char firstLineBuf[FIRSTLINEBUF];
        int tempBufLength = 0;
                
        for( bufLength = 0,  tempBufLength=0; bufLength < FIRSTLINEBUF; bufLength++, tempBufLength++)
        {
            if ((tempBufLength == 0) && ((inEncName == "utf-16be") || (inEncName == "utf-16le") || (inEncName == "utf16")) )
                tempBufLength++;
            firstLineBuf[bufLength] = (char)tempBuf[tempBufLength];
            if (tempBuf[tempBufLength] == 0x3E) {
                firstLineBuf[bufLength+1] = '\0';
                break;
            }
            
        }
        char* pFLB = new char[sizeof(firstLineBuf) +1];
        strcpy(pFLB, firstLineBuf);

        //if the file doesnot contain the version string line then its and illegal file
        //
        if (firstLineBuf[0] != 0x3C ) 
        {
              fprintf(stderr,"Illegal xml file: It doesnot contain the xml declaration statement on the first line \n");
              fclose(inF);
    	      fclose(outF);
              exit(1);
        }
     
        bool encString      = true;
        bool stdString      = true;
        bool encInsertMid   = false;
        bool encInsertLast  = false;
        bool dQuote         = true;
        char* doubleQuote   = "\"";
        char* singleQuote   = "\'";
        
        if (!strstr( (const char*)pFLB, doubleQuote))
        {
            if (!strstr( (const char*)pFLB, singleQuote))
            {
              fprintf(stderr,"Illegal xml file: It doesnot contain the approprite xml declaration \n");
              fclose(inF);
    	      fclose(outF);
              exit(1);
            }
            dQuote = false;
        }
        
        char* newString     = strstr( (const char*) pFLB, "encoding");
        char* stringWithEnc = 0;

        if (!newString)
            encString = false;
        else 
	  {
	    stringWithEnc = new char[strlen(newString)+1];
            strcpy(stringWithEnc, newString);
	  }
            
        newString = strstr( (const char*) pFLB, "standalone");
        char* stringWithStd = 0;
        if (!newString) 
            stdString = false;
        else
        {
	    stringWithStd = new char[strlen(newString)+1];
            strcpy(stringWithStd, newString);
       }
        
        if (!encString && !stdString)
             encInsertLast = true;
        if (!encString && stdString)
             encInsertMid = true;

        //Encodingname for the rest of the input file could be different. 
        //If its not specified in the  first line then assume it to be UTF8
        if (encInsertLast || encInsertMid)
        {
            //if the encoding type was found utf16 family or ebcdic and 
            // the encoding string is not present in the file then its an error
            if (!strcmp(inEncName, "utf-16be") 
                || !strcmp(inEncName, "utf-16le")
                || !strcmp(inEncName, "ebcdic-cp-us"))
            {
                fprintf(stderr, "Illegal xml file: it doesnot contain the encoding string in the first line of the input file\n");
                fclose(inF);
    	        fclose(outF);
                exit(1);
            }
            strcpy(encodingNameInFile, inEncName);
        }

        char* tempString    = " encoding=";
        char* dupFLB        = strdup(pFLB);
	int stringTwoLength = 0;

	/* build up the length */
	stringTwoLength = bufLength;

	if(tempString)
	  stringTwoLength += strlen(tempString);

	if(outEncName)
	  stringTwoLength += strlen(outEncName);

	if(stringWithStd)
	  stringTwoLength += strlen(stringWithStd);

	stringTwoLength   += 5;
	
        char* stringTwo     = new char[stringTwoLength];

        if (encInsertLast) {
            char* stringOne = new char[bufLength];
            strncpy(stringOne, pFLB, bufLength-1);
            strcpy(stringOne+bufLength-1, "");
            stringTwo = strcpy(stringTwo, stringOne);
            strcat(stringTwo, tempString);
            catString(stringTwo, dQuote);
            strcat(stringTwo, outEncName);
            catString(stringTwo, dQuote);
            strcat(stringTwo , " ?>");
            delete stringOne;
        }
        //insert the string before 'standalone' statement
        else if (encInsertMid) {
            char* stringThree = new char[bufLength + strlen(tempString) + strlen(outEncName) + 5];
            if (dQuote)         
                stringThree = strtok(dupFLB, doubleQuote);
            else
                stringThree = strtok(dupFLB, singleQuote);

            strcpy(stringTwo, stringThree);
            catString(stringTwo, dQuote);

            char* tmpString;
            if (dQuote)
                tmpString = strtok(0, doubleQuote);
            else
                tmpString = strtok(0, singleQuote);
            if (tmpString != NULL)
                strcat(stringTwo, tmpString);
            
            catString(stringTwo, dQuote);
            strcat(stringTwo, tempString);
            catString(stringTwo, dQuote);

            strcat(stringTwo, outEncName);
            if (dQuote)
                strcat(stringTwo, "\" ");
            else
                strcat(stringTwo, "\' ");
            strcat(stringTwo, stringWithStd);
            delete stringThree;
        }
        //if the encoding string is there then modify the output encoding name in it.
        else if (encString)
        {
            char* stringFive  = new char[strlen(dupFLB)+1];
                    
            if (dQuote)
                stringFive = strtok (dupFLB, doubleQuote);
            else
                stringFive = strtok (dupFLB, singleQuote);

            strcpy(stringTwo, stringFive);
            catString(stringTwo, dQuote);
            while (stringFive != NULL)
            {
                if (dQuote)
                    stringFive = strtok(0,doubleQuote);
                else
                    stringFive = strtok(0,singleQuote);

                if (stringFive == NULL)
                    break;
                strcat(stringTwo, stringFive);
                
                char* n1String = strstr(stringFive, ">");
                if (!n1String)
                    catString(stringTwo, dQuote);
                
                char* nString = strstr(stringFive, "encoding");
                if (nString) 
                {
                    strcat(stringTwo, outEncName);
                    if (dQuote)
                        stringFive = strtok(0, doubleQuote);
                    else
                        stringFive = strtok(0, singleQuote);
                    strcpy(encodingNameInFile, stringFive); //this is the encoded string name
                    catString(stringTwo, dQuote);
                }
            }
            if (stringFive != NULL)
            {
                delete stringFive;
                stringFive = 0;
            }
        }
      
        // introduce the first order bytes for utf16 be and le files
        //
        if (!strcmp(outEncName, "utf-16be") || !strcmp(outEncName, "utf16"))
        {
            uBuf[0] = 0xFE;
            fwrite( (void*) uBuf, 1, 1, outF);
            uBuf[0] = 0xFF;
            fwrite( (void*) uBuf, 1, 1, outF);
        } else if (!strcmp(outEncName , "utf-16le"))
        {
           uBuf[0] = 0xFF;
           fwrite( (void*) uBuf, 1, 1, outF);
           uBuf[0] = 0xFE;
           fwrite( (void*) uBuf, 1, 1, outF);
        }

        err = U_ZERO_ERROR;
        long oneChar = 0;
        while ( *stringTwo != '\0' )
        {
            //transcode character-by-character
            oneChar = ucnv_convert(outEncName,
			    "ascii",
			    (char*) uBuf,
			    0,
                (const char*) stringTwo,
			    1,
			    &err);
            if (err == U_BUFFER_OVERFLOW_ERROR)
            {
	            err = U_ZERO_ERROR;
            }
	        else if (U_FAILURE(err))
	        {
#if defined(_DEBUG)
		        fprintf (stderr, "Error transcoding char-by-char: (%s) %d\n", errorName(err), err);
#endif
                fclose(inF);
    	        fclose(outF);
                exit(1);
	        }

            ucnv_convert(outEncName,
			    "ascii",
			    (char*) uBuf,
			    oneChar,
                (const char*) stringTwo,
			    1,
			    &err);
	        if (U_FAILURE(err))
	        {
#if defined(_DEBUG)
		        fprintf (stderr, "Error transcoding2 char-by-char: (%s) %d\n", errorName(err), err);
#endif
                fclose(inF);
    	        fclose(outF);
                exit(1);
            } 
            fwrite( (void*) uBuf, 1, oneChar, outF);
            stringTwo++;
        }
    }
    

    //Now get the pointer offset after the first line in the input file 
    //and return this position
    //
    char* newInEncName  = new char[strlen(inEncName) +1];
    strcpy(newInEncName, inEncName);
    if (encodingNameInFile !=NULL) 
    {
        if (inEncName)
            delete newInEncName;
        newInEncName = new char[strlen(encodingNameInFile)+1];
        strcpy(newInEncName, encodingNameInFile);
    }

     char   oldBuf[RAWBUFSIZE];   
     int    bufHere    = bufLength +1;
     if (!strcmp(newInEncName, "utf-16be") || !strcmp(newInEncName, "utf16") || !strcmp(newInEncName, "utf-16le")) 
     {
         bufHere +=1;
        memcpy((void*)oldBuf, (void*) tempBuf, bufHere);
     }
     else
              memcpy((void*)oldBuf, (void*) tempBuf, bufHere);
     
     char   newBuf[RAWBUFSIZE];
     long   endBytes    = 0;    
     //transcode this ascii type to the input encoding type  
     //and get the pointer to the end of first line in the input buffer 
     //
     err = U_ZERO_ERROR;
     endBytes = ucnv_convert(newInEncName,
     		"ascii",
			(char*) newBuf,
			0,
			(const char*) oldBuf,
			bufHere,
			&err);
    
    if (err == U_BUFFER_OVERFLOW_ERROR)
    {
	    err = U_ZERO_ERROR;
    }
	else if (U_FAILURE(err))
	{
#if defined(_DEBUG)
		fprintf (stderr, "Error transcoding from ascii to input encoding: (%s) %d\n", errorName(err), err);
#endif
        fclose(inF);
    	fclose(outF);
        exit(1);
	}
    ucnv_convert(newInEncName,
        	"ascii",
			(char*) newBuf,
			endBytes,
			(const char*) oldBuf,
			bufHere,
			&err);										 
	if (U_FAILURE(err))
	{
#if defined(_DEBUG)
		fprintf (stderr, "Error transcoding2 from ascii to input encoding: (%s) %d\n", errorName(err), err);
#endif
        delete newInEncName;
        fclose(inF);
    	fclose(outF);
        exit(1);
    }
    
    return endBytes;
}


int32_t  XMLUConvert( UConverter* inConverter,
                      UConverter* outConverter,
                      const char* inBuffer, 
                      int32_t* inBufSize, 
                      char* outBuffer, 
                      int32_t outBufCapacity, 
                      bool_t flush,
                      UErrorCode* err)
{
    const char* inBufferAlias = inBuffer;
    char* outBufferAlias = outBuffer;
    const char* inBufferEnd = inBuffer + *inBufSize;
    const char* outBufferEnd = outBuffer + outBufCapacity;
    //const char* consumed;
    
    if (U_FAILURE(*err)) return 0;
    
    XMLU_fromCodepageToCodepage(outConverter,
        inConverter,
        &outBufferAlias, 
        outBufferEnd, 
        &inBufferAlias, 
        inBufferEnd, 
        NULL, 
        flush,
        err);
    
    if (*err == U_INDEX_OUTOFBOUNDS_ERROR) *err = U_BUFFER_OVERFLOW_ERROR;
    
   // *inBufSize = inBufferAlias;
    return outBufferAlias - outBuffer;
}

void XMLU_fromCodepageToCodepage(    UConverter*    outConverter,
                        UConverter*    inConverter,
                        char**         target,
                        const char*    targetLimit,
                        const char**   source,
                        const char*    sourceLimit,
                        int32_t*       offsets,
                        bool_t         flush,
                        UErrorCode*    err)
{
    
#if 0
    UChar out_chunk[RAWBUFSIZE];
    const UChar* out_chunk_limit = out_chunk + RAWBUFSIZE;
    UChar* out_chunk_alias;
    UChar const* out_chunk_alias2;
    UChar const* consumed_UChars;
    
    
    if (U_FAILURE(*err)) return;
    
    *consumed = *source;
    /*loops until the input buffer is completely consumed
    *or if an error has be encountered
    *first we convert from inConverter codepage to Unicode
    *then from Unicode to outConverter codepage
    */
    
    while ((sourceLimit != *source) && U_SUCCESS(*err))
    {
        out_chunk_alias = out_chunk;
        *source = *consumed;
        ucnv_reset(inConverter);
        ucnv_toUnicode(inConverter,
            &out_chunk_alias,
            out_chunk_limit,
            source,
            sourceLimit,
            consumed,
            flush,
            err);
        
            /*U_INDEX_OUTOFBOUNDS_ERROR means that the output "CHUNK" is full
            *we will require at least another loop (it's a recoverable error)
        */
        
        if (U_SUCCESS(*err) || (*err == U_INDEX_OUTOFBOUNDS_ERROR))
        {
            *err = U_ZERO_ERROR;
            out_chunk_alias2 = out_chunk;
            
            while ((out_chunk_alias2 != out_chunk_alias) && U_SUCCESS(*err))
            {
                ucnv_fromUnicode(outConverter,
                    target,
                    targetLimit,
                    &out_chunk_alias2,
                    out_chunk_alias,
                    &consumed_UChars,
                    FALSE,
                    err); 
                
            }
        }
        else break;
    }
    return;

#endif


  UChar out_chunk[RAWBUFSIZE];
  const UChar *out_chunk_limit = out_chunk + RAWBUFSIZE;
  UChar *out_chunk_alias;
  UChar const *out_chunk_alias2;


  if (U_FAILURE (*err))    return;


  /*loops until the input buffer is completely consumed
   *or if an error has be encountered
   *first we convert from inConverter codepage to Unicode
   *then from Unicode to outConverter codepage
   */
  while ((*source != sourceLimit) && U_SUCCESS (*err))
    {
      out_chunk_alias = out_chunk;
      ucnv_toUnicode (inConverter,
		      &out_chunk_alias,
		      out_chunk_limit,
		      source,
		      sourceLimit,
		      NULL,
		      flush,
		      err);

      /*U_INDEX_OUTOFBOUNDS_ERROR means that the output "CHUNK" is full
       *we will require at least another loop (it's a recoverable error)
       */

      if (U_SUCCESS (*err) || (*err == U_INDEX_OUTOFBOUNDS_ERROR))
	{
	  *err = U_ZERO_ERROR;
	  out_chunk_alias2 = out_chunk;

	  while ((out_chunk_alias2 != out_chunk_alias) && U_SUCCESS (*err))
	    {
	      ucnv_fromUnicode (outConverter,
				target,
				targetLimit,
				&out_chunk_alias2,
				out_chunk_alias,
				NULL,
				TRUE,
				err);

	    }
	}
      else
	break;
    }

  return;
}

void catString(char* thisString, bool quote)
{
    if (quote)
        strcat(thisString, "\"");
    else
        strcat(thisString, "\'");
}