scuffed-code/icu4c/source/common/ucnvlat1.c

/* 
**********************************************************************
*   Copyright (C) 2000, International Business Machines
*   Corporation and others.  All Rights Reserved.
**********************************************************************
*   file name:  ucnvlat1.cpp
*   encoding:   US-ASCII
*   tab size:   8 (not used)
*   indentation:4
*
*   created on: 2000feb07
*   created by: Markus W. Scherer
*/

#include "unicode/utypes.h"
#include "unicode/ucnv.h"
#include "unicode/ucnv_err.h"
#include "ucnv_bld.h"
#include "ucnv_cnv.h"

/* ISO 8859-1 --------------------------------------------------------------- */

/* This is a table-less and callback-less version of _MBCSSingleToBMPWithOffsets(). */
U_CFUNC void
_Latin1ToUnicodeWithOffsets(UConverterToUnicodeArgs *pArgs,
                            UErrorCode *pErrorCode) {
    const uint8_t *source;
    UChar *target;
    int32_t targetCapacity, length;
    int32_t *offsets;

    /* set up the local pointers */
    source=(const uint8_t *)pArgs->source;
    target=pArgs->target;
    targetCapacity=pArgs->targetLimit-pArgs->target;

    /*
     * since the conversion here is 1:1 UChar:uint8_t, we need only one counter
     * for the minimum of the sourceLength and targetCapacity
     */
    length=(const uint8_t *)pArgs->sourceLimit-source;
    if(length<=targetCapacity) {
        targetCapacity=length;
    } else {
        /* target will be full */
        *pErrorCode=U_BUFFER_OVERFLOW_ERROR;
        length=targetCapacity;
    }

    /* conversion loop */
    while(targetCapacity>0) {
        *target++=*source++;
        --targetCapacity;
    }

    /* write back the updated pointers */
    pArgs->source=(const char *)source;
    pArgs->target=target;

    /* set offsets */
    offsets=pArgs->offsets;
    if(offsets!=NULL) {
        int32_t sourceIndex=0;

        while(length>0) {
            *offsets++=sourceIndex++;
            --length;
        }
        pArgs->offsets=offsets;
    }
}

/* This is a table-less and callback-less version of _MBCSSingleGetNextUChar(). */
U_CFUNC UChar32
_Latin1GetNextUChar(UConverterToUnicodeArgs *pArgs,
                    UErrorCode *pErrorCode) {
    const uint8_t *source=(const uint8_t *)pArgs->source;
    if(source<(const uint8_t *)pArgs->sourceLimit) {
        pArgs->source=(const char *)(source+1);
        return *source;
    }

    /* no output because of empty input */
    *pErrorCode=U_INDEX_OUTOFBOUNDS_ERROR;
    return 0xffff;
}

/* This is a table-less version of _MBCSSingleFromBMPWithOffsets(). */
U_CFUNC void
_Latin1FromUnicodeWithOffsets(UConverterFromUnicodeArgs *pArgs,
                              UErrorCode *pErrorCode) {
    UConverter *cnv;
    const UChar *source, *sourceLimit, *lastSource;
    uint8_t *target;
    int32_t targetCapacity, length;
    int32_t *offsets;

    UChar32 c, max;

    int32_t sourceIndex;

    UConverterCallbackReason reason;
    int32_t i;

    /* set up the local pointers */
    cnv=pArgs->converter;
    source=pArgs->source;
    sourceLimit=pArgs->sourceLimit;
    target=(uint8_t *)pArgs->target;
    targetCapacity=pArgs->targetLimit-pArgs->target;
    offsets=pArgs->offsets;

    if(cnv->sharedData==&_Latin1Data) {
        max=0xff; /* Latin-1 */
    } else {
        max=0x7f; /* US-ASCII */
    }

    /* get the converter state from UConverter */
    c=cnv->fromUSurrogateLead;

    /* sourceIndex=-1 if the current character began in the previous buffer */
    sourceIndex= c==0 ? 0 : -1;
    lastSource=source;

    /*
     * since the conversion here is 1:1 UChar:uint8_t, we need only one counter
     * for the minimum of the sourceLength and targetCapacity
     */
    length=sourceLimit-source;
    if(length<targetCapacity) {
        targetCapacity=length;
    }

    /* conversion loop */
    if(c!=0 && targetCapacity>0) {
        goto getTrail;
    }

    while(targetCapacity>0) {
        /*
         * Get a correct Unicode code point:
         * a single UChar for a BMP code point or
         * a matched surrogate pair for a "surrogate code point".
         */
        c=*source++;
        if(c<=max) {
            /* convert the Unicode code point */
            *target++=(uint8_t)c;
            --targetCapacity;

            /* normal end of conversion: prepare for a new character */
            c=0;
        } else {
            if(!UTF_IS_SURROGATE(c)) {
                /* callback(unassigned) */
                reason=UCNV_UNASSIGNED;
                *pErrorCode=U_INVALID_CHAR_FOUND;
            } else if(UTF_IS_SURROGATE_FIRST(c)) {
getTrail:
                if(source<sourceLimit) {
                    /* test the following code unit */
                    UChar trail=*source;
                    if(UTF_IS_SECOND_SURROGATE(trail)) {
                        ++source;
                        c=UTF16_GET_PAIR_VALUE(c, trail);
                        /* this codepage does not map supplementary code points */
                        /* callback(unassigned) */
                        reason=UCNV_UNASSIGNED;
                        *pErrorCode=U_INVALID_CHAR_FOUND;
                    } else {
                        /* this is an unmatched lead code unit (1st surrogate) */
                        /* callback(illegal) */
                        reason=UCNV_ILLEGAL;
                        *pErrorCode=U_ILLEGAL_CHAR_FOUND;
                    }
                } else {
                    /* no more input */
                    break;
                }
            } else {
                /* this is an unmatched trail code unit (2nd surrogate) */
                /* callback(illegal) */
                reason=UCNV_ILLEGAL;
                *pErrorCode=U_ILLEGAL_CHAR_FOUND;
            }

            /* call the callback function with all the preparations and post-processing */
            /* get the number of code units for c to correctly advance sourceIndex after the callback call */
            length=UTF_CHAR_LENGTH(c);

            /* set offsets since the start or the last callback */
            if(offsets!=NULL) {
                int32_t count=(int32_t)(source-lastSource);

                /* do not set the offset for the callback-causing character */
                count-=length;

                while(count>0) {
                    *offsets++=sourceIndex++;
                    --count;
                }
                /* offset and sourceIndex are now set for the current character */
            }

            /* update the arguments structure */
            pArgs->source=source;
            pArgs->target=(char *)target;
            pArgs->offsets=offsets;

            /* set the converter state in UConverter to deal with the next character */
            cnv->fromUSurrogateLead=0;

            /* write the code point as code units */
            i=0;
            UTF_APPEND_CHAR_UNSAFE(cnv->invalidUCharBuffer, i, c);
            cnv->invalidUCharLength=(int8_t)i;
            /* i==length */

            /* call the callback function */
            cnv->fromUCharErrorBehaviour(cnv->fromUContext, pArgs, cnv->invalidUCharBuffer, i, c, reason, pErrorCode);

            /* get the converter state from UConverter */
            c=cnv->fromUSurrogateLead;

            /* update target and deal with offsets if necessary */
            offsets=ucnv_updateCallbackOffsets(offsets, ((uint8_t *)pArgs->target)-target, sourceIndex);
            target=(uint8_t *)pArgs->target;

            /* update the source pointer and index */
            sourceIndex+=length+(pArgs->source-source);
            source=lastSource=pArgs->source;
            targetCapacity=(uint8_t *)pArgs->targetLimit-target;
            length=sourceLimit-source;
            if(length<targetCapacity) {
                targetCapacity=length;
            }

            /*
             * If the callback overflowed the target, then we need to
             * stop here with an overflow indication.
             */
            if(*pErrorCode==U_BUFFER_OVERFLOW_ERROR) {
                break;
            } else if(U_FAILURE(*pErrorCode)) {
                /* break on error */
                c=0;
                break;
            } else if(cnv->charErrorBufferLength>0) {
                /* target is full */
                *pErrorCode=U_BUFFER_OVERFLOW_ERROR;
                break;
            }
        }
    }

    if(U_SUCCESS(*pErrorCode) && source<sourceLimit && target>=(uint8_t *)pArgs->targetLimit) {
        /* target is full */
        *pErrorCode=U_BUFFER_OVERFLOW_ERROR;
    }

    /* set offsets since the start or the last callback */
    if(offsets!=NULL) {
        size_t count=source-lastSource;
        while(count>0) {
            *offsets++=sourceIndex++;
            --count;
        }
    }

    if(pArgs->flush && source>=sourceLimit) {
        /* reset the state for the next conversion */
        if(c!=0 && U_SUCCESS(*pErrorCode)) {
            /* a Unicode code point remains incomplete (only a first surrogate) */
            *pErrorCode=U_TRUNCATED_CHAR_FOUND;
        }
        cnv->fromUSurrogateLead=0;
    } else {
        /* set the converter state back into UConverter */
        cnv->fromUSurrogateLead=(UChar)c;
    }

    /* write back the updated pointers */
    pArgs->source=source;
    pArgs->target=(char *)target;
    pArgs->offsets=offsets;
}

static const UConverterImpl _Latin1Impl={
    UCNV_LATIN_1,

    NULL,
    NULL,

    NULL,
    NULL,
    NULL,

    _Latin1ToUnicodeWithOffsets,
    _Latin1ToUnicodeWithOffsets,
    _Latin1FromUnicodeWithOffsets,
    _Latin1FromUnicodeWithOffsets,
    _Latin1GetNextUChar,

    NULL,
    NULL
};

static const UConverterStaticData _Latin1StaticData={
    sizeof(UConverterStaticData),
    "LATIN_1",
    819, UCNV_IBM, UCNV_LATIN_1, 1, 1,
    { 0x1a, 0, 0, 0 }, 1, FALSE, FALSE,
    0,
    { 0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0 } /* reserved */
};

const UConverterSharedData _Latin1Data={
    sizeof(UConverterSharedData), ~((uint32_t) 0),
    NULL, NULL, &_Latin1StaticData, FALSE, &_Latin1Impl, 
    0
};

/* US-ASCII ----------------------------------------------------------------- */

/* This is a table-less version of _MBCSSingleToBMPWithOffsets(). */
U_CFUNC void
_ASCIIToUnicodeWithOffsets(UConverterToUnicodeArgs *pArgs,
                           UErrorCode *pErrorCode) {
    const uint8_t *source, *sourceLimit, *lastSource;
    UChar *target;
    int32_t targetCapacity, length;
    int32_t *offsets;

    int32_t sourceIndex;
    uint8_t b;

    /* set up the local pointers */
    source=(const uint8_t *)pArgs->source;
    sourceLimit=(const uint8_t *)pArgs->sourceLimit;
    target=pArgs->target;
    targetCapacity=pArgs->targetLimit-pArgs->target;
    offsets=pArgs->offsets;

    /* sourceIndex=-1 if the current character began in the previous buffer */
    sourceIndex=0;
    lastSource=source;

    /*
     * since the conversion here is 1:1 UChar:uint8_t, we need only one counter
     * for the minimum of the sourceLength and targetCapacity
     */
    length=sourceLimit-source;
    if(length<targetCapacity) {
        targetCapacity=length;
    }

    /* conversion loop */
    while(targetCapacity>0) {
        b=*source++;
        if(b<=0x7f) {
            *target++=b;
            --targetCapacity;
        } else {
            /* call the callback function with all the preparations and post-processing */
            UConverter *cnv=pArgs->converter;

            /* callback(illegal) */
            *pErrorCode=U_ILLEGAL_CHAR_FOUND;

            /* set offsets since the start or the last callback */
            if(offsets!=NULL) {
                int32_t count=(int32_t)(source-lastSource);

                /* predecrement: do not set the offset for the callback-causing character */
                while(--count>0) {
                    *offsets++=sourceIndex++;
                }
                /* offset and sourceIndex are now set for the current character */
            }

            /* update the arguments structure */
            pArgs->source=(const char *)source;
            pArgs->target=target;
            pArgs->offsets=offsets;

            /* copy the current bytes to invalidCharBuffer */
            cnv->invalidCharBuffer[0]=b;
            cnv->invalidCharLength=1;

            /* call the callback function */
            cnv->fromCharErrorBehaviour(cnv->toUContext, pArgs, cnv->invalidCharBuffer, 1, UCNV_ILLEGAL, pErrorCode);

            /* update target and deal with offsets if necessary */
            offsets=ucnv_updateCallbackOffsets(offsets, pArgs->target-target, sourceIndex);
            target=pArgs->target;

            /* update the source pointer and index */
            sourceIndex+=1+((const uint8_t *)pArgs->source-source);
            source=lastSource=(const uint8_t *)pArgs->source;
            targetCapacity=pArgs->targetLimit-target;
            length=sourceLimit-source;
            if(length<targetCapacity) {
                targetCapacity=length;
            }

            /*
             * If the callback overflowed the target, then we need to
             * stop here with an overflow indication.
             */
            if(*pErrorCode==U_BUFFER_OVERFLOW_ERROR) {
                break;
            } else if(U_FAILURE(*pErrorCode)) {
                /* break on error */
                break;
            } else if(cnv->UCharErrorBufferLength>0) {
                /* target is full */
                *pErrorCode=U_BUFFER_OVERFLOW_ERROR;
                break;
            }
        }
    }

    if(U_SUCCESS(*pErrorCode) && source<sourceLimit && target>=pArgs->targetLimit) {
        /* target is full */
        *pErrorCode=U_BUFFER_OVERFLOW_ERROR;
    }

    /* set offsets since the start or the last callback */
    if(offsets!=NULL) {
        size_t count=source-lastSource;
        while(count>0) {
            *offsets++=sourceIndex++;
            --count;
        }
    }

    /* write back the updated pointers */
    pArgs->source=(const char *)source;
    pArgs->target=target;
    pArgs->offsets=offsets;
}

/* This is a table-less version of _MBCSSingleGetNextUChar(). */
U_CFUNC UChar32
_ASCIIGetNextUChar(UConverterToUnicodeArgs *pArgs,
                   UErrorCode *pErrorCode) {
    UChar buffer[UTF_MAX_CHAR_LENGTH];
    const uint8_t *source;
    uint8_t b;

    /* set up the local pointers */
    source=(const uint8_t *)pArgs->source;

    /* conversion loop */
    while(source<(const uint8_t *)pArgs->sourceLimit) {
        b=*source++;
        pArgs->source=(const char *)source;
        if(b<=0x7f) {
            return b;
        } else {
            /* call the callback function with all the preparations and post-processing */
            UConverter *cnv=pArgs->converter;

            /* callback(illegal) */
            *pErrorCode=U_ILLEGAL_CHAR_FOUND;

            /* update the arguments structure */
            pArgs->target=buffer;
            pArgs->targetLimit=buffer+UTF_MAX_CHAR_LENGTH;

            /* copy the current byte to invalidCharBuffer */
            cnv->invalidCharBuffer[0]=(char)b;
            cnv->invalidCharLength=1;

            /* call the callback function */
            cnv->fromCharErrorBehaviour(cnv->toUContext, pArgs, cnv->invalidCharBuffer, 1, UCNV_ILLEGAL, pErrorCode);

            /* update the source pointer */
            source=(const uint8_t *)pArgs->source;

            /*
             * return the first character if the callback wrote some
             * we do not need to goto finish because the converter state is already set
             */
            if(U_SUCCESS(*pErrorCode)) {
                int32_t length=pArgs->target-buffer;
                if(length>0) {
                    return ucnv_getUChar32KeepOverflow(cnv, buffer, length);
                }
                /* else (callback did not write anything) continue */
            } else if(*pErrorCode==U_BUFFER_OVERFLOW_ERROR) {
                *pErrorCode=U_ZERO_ERROR;
                return ucnv_getUChar32KeepOverflow(cnv, buffer, UTF_MAX_CHAR_LENGTH);
            } else {
                /* break on error */
                /* ### what if a callback set an error but _also_ generated output?! */
                return 0xffff;
            }
        }
    }

    /* no output because of empty input or only skipping callbacks */
    *pErrorCode=U_INDEX_OUTOFBOUNDS_ERROR;
    return 0xffff;
}

static const UConverterImpl _ASCIIImpl={
    UCNV_US_ASCII,

    NULL,
    NULL,

    NULL,
    NULL,
    NULL,

    _ASCIIToUnicodeWithOffsets,
    _ASCIIToUnicodeWithOffsets,
    _Latin1FromUnicodeWithOffsets,
    _Latin1FromUnicodeWithOffsets,
    _ASCIIGetNextUChar,

    NULL,
    NULL
};

static const UConverterStaticData _ASCIIStaticData={
    sizeof(UConverterStaticData),
    "US-ASCII",
    367, UCNV_IBM, UCNV_US_ASCII, 1, 1,
    { 0x1a, 0, 0, 0 }, 1, FALSE, FALSE,
    0,
    { 0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0 } /* reserved */
};

const UConverterSharedData _ASCIIData={
    sizeof(UConverterSharedData), ~((uint32_t) 0),
    NULL, NULL, &_ASCIIStaticData, FALSE, &_ASCIIImpl, 
    0
};
ICU-311 Major rewrite of the internal codepath to use the UConverterToUnicodeArgs and UConverterFromUnicodeArgs. X-SVN-Rev: 1777 2000-07-10 20:51:54 +00:00			`/*`
ICU-266 c++-ify UConverter (UConverterImpl resembles a vtable) X-SVN-Rev: 723 2000-02-07 17:35:21 +00:00			`**********************************************************************`
			`* Copyright (C) 2000, International Business Machines`
			`* Corporation and others. All Rights Reserved.`
			`**********************************************************************`
			`* file name: ucnvlat1.cpp`
			`* encoding: US-ASCII`
			`* tab size: 8 (not used)`
			`* indentation:4`
			`*`
			`* created on: 2000feb07`
			`* created by: Markus W. Scherer`
			`*/`

			`#include "unicode/utypes.h"`
ICU-484 reimplement LATIN_1 to work correctly with offsets and UTF-16 X-SVN-Rev: 3285 2000-12-20 01:22:02 +00:00			`#include "unicode/ucnv.h"`
ICU-311 Major rewrite of the callback APIs. Needs to be checked in without more testing to make the 6/29/2000 API deadline. Lots of warnings still, offset handling is not implemented. More unit tests is required because the current implementation actually passes the tests. X-SVN-Rev: 1661 2000-06-27 20:47:56 +00:00			`#include "unicode/ucnv_err.h"`
			`#include "ucnv_bld.h"`
ICU-266 c++-ify UConverter (UConverterImpl resembles a vtable) X-SVN-Rev: 723 2000-02-07 17:35:21 +00:00			`#include "ucnv_cnv.h"`

			`/* ISO 8859-1 --------------------------------------------------------------- */`

ICU-484 reimplement LATIN_1 to work correctly with offsets and UTF-16 X-SVN-Rev: 3285 2000-12-20 01:22:02 +00:00			`/* This is a table-less and callback-less version of _MBCSSingleToBMPWithOffsets(). */`
			`U_CFUNC void`
			`_Latin1ToUnicodeWithOffsets(UConverterToUnicodeArgs *pArgs,`
			`UErrorCode *pErrorCode) {`
			`const uint8_t *source;`
			`UChar *target;`
			`int32_t targetCapacity, length;`
			`int32_t *offsets;`

			`/* set up the local pointers */`
			`source=(const uint8_t *)pArgs->source;`
			`target=pArgs->target;`
			`targetCapacity=pArgs->targetLimit-pArgs->target;`

			`/*`
			`* since the conversion here is 1:1 UChar:uint8_t, we need only one counter`
			`* for the minimum of the sourceLength and targetCapacity`
			`*/`
			`length=(const uint8_t *)pArgs->sourceLimit-source;`
			`if(length<=targetCapacity) {`
			`targetCapacity=length;`
			`} else {`
			`/* target will be full */`
			`*pErrorCode=U_BUFFER_OVERFLOW_ERROR;`
			`length=targetCapacity;`
			`}`

			`/* conversion loop */`
			`while(targetCapacity>0) {`
			`target++=source++;`
			`--targetCapacity;`
ICU-266 c++-ify UConverter (UConverterImpl resembles a vtable) X-SVN-Rev: 723 2000-02-07 17:35:21 +00:00			`}`
ICU-484 reimplement LATIN_1 to work correctly with offsets and UTF-16 X-SVN-Rev: 3285 2000-12-20 01:22:02 +00:00
			`/* write back the updated pointers */`
			`pArgs->source=(const char *)source;`
			`pArgs->target=target;`

			`/* set offsets */`
			`offsets=pArgs->offsets;`
			`if(offsets!=NULL) {`
			`int32_t sourceIndex=0;`

			`while(length>0) {`
			`*offsets++=sourceIndex++;`
			`--length;`
			`}`
			`pArgs->offsets=offsets;`
ICU-266 c++-ify UConverter (UConverterImpl resembles a vtable) X-SVN-Rev: 723 2000-02-07 17:35:21 +00:00			`}`
ICU-484 reimplement LATIN_1 to work correctly with offsets and UTF-16 X-SVN-Rev: 3285 2000-12-20 01:22:02 +00:00			`}`
ICU-266 c++-ify UConverter (UConverterImpl resembles a vtable) X-SVN-Rev: 723 2000-02-07 17:35:21 +00:00
ICU-484 reimplement LATIN_1 to work correctly with offsets and UTF-16 X-SVN-Rev: 3285 2000-12-20 01:22:02 +00:00			`/* This is a table-less and callback-less version of _MBCSSingleGetNextUChar(). */`
			`U_CFUNC UChar32`
			`_Latin1GetNextUChar(UConverterToUnicodeArgs *pArgs,`
			`UErrorCode *pErrorCode) {`
			`const uint8_t source=(const uint8_t )pArgs->source;`
			`if(source<(const uint8_t *)pArgs->sourceLimit) {`
			`pArgs->source=(const char *)(source+1);`
			`return *source;`
			`}`
ICU-266 c++-ify UConverter (UConverterImpl resembles a vtable) X-SVN-Rev: 723 2000-02-07 17:35:21 +00:00
ICU-484 reimplement LATIN_1 to work correctly with offsets and UTF-16 X-SVN-Rev: 3285 2000-12-20 01:22:02 +00:00			`/* no output because of empty input */`
			`*pErrorCode=U_INDEX_OUTOFBOUNDS_ERROR;`
			`return 0xffff;`
ICU-266 c++-ify UConverter (UConverterImpl resembles a vtable) X-SVN-Rev: 723 2000-02-07 17:35:21 +00:00			`}`

ICU-484 reimplement LATIN_1 to work correctly with offsets and UTF-16 X-SVN-Rev: 3285 2000-12-20 01:22:02 +00:00			`/* This is a table-less version of _MBCSSingleFromBMPWithOffsets(). */`
			`U_CFUNC void`
			`_Latin1FromUnicodeWithOffsets(UConverterFromUnicodeArgs *pArgs,`
			`UErrorCode *pErrorCode) {`
			`UConverter *cnv;`
			`const UChar source, sourceLimit, *lastSource;`
			`uint8_t *target;`
			`int32_t targetCapacity, length;`
			`int32_t *offsets;`

			`UChar32 c, max;`

			`int32_t sourceIndex;`

			`UConverterCallbackReason reason;`
			`int32_t i;`

			`/* set up the local pointers */`
			`cnv=pArgs->converter;`
			`source=pArgs->source;`
			`sourceLimit=pArgs->sourceLimit;`
			`target=(uint8_t *)pArgs->target;`
			`targetCapacity=pArgs->targetLimit-pArgs->target;`
			`offsets=pArgs->offsets;`

ICU-705 add algorithmic US-ASCII converter X-SVN-Rev: 3286 2000-12-20 02:08:39 +00:00			`if(cnv->sharedData==&_Latin1Data) {`
			`max=0xff; /* Latin-1 */`
			`} else {`
			`max=0x7f; /* US-ASCII */`
			`}`
ICU-484 reimplement LATIN_1 to work correctly with offsets and UTF-16 X-SVN-Rev: 3285 2000-12-20 01:22:02 +00:00
			`/* get the converter state from UConverter */`
			`c=cnv->fromUSurrogateLead;`

			`/* sourceIndex=-1 if the current character began in the previous buffer */`
			`sourceIndex= c==0 ? 0 : -1;`
			`lastSource=source;`

			`/*`
			`* since the conversion here is 1:1 UChar:uint8_t, we need only one counter`
			`* for the minimum of the sourceLength and targetCapacity`
			`*/`
			`length=sourceLimit-source;`
			`if(length<targetCapacity) {`
			`targetCapacity=length;`
			`}`

			`/* conversion loop */`
			`if(c!=0 && targetCapacity>0) {`
			`goto getTrail;`
			`}`

			`while(targetCapacity>0) {`
			`/*`
			`* Get a correct Unicode code point:`
			`* a single UChar for a BMP code point or`
			`* a matched surrogate pair for a "surrogate code point".`
			`*/`
			`c=*source++;`
			`if(c<=max) {`
			`/* convert the Unicode code point */`
			`*target++=(uint8_t)c;`
			`--targetCapacity;`

			`/* normal end of conversion: prepare for a new character */`
			`c=0;`
			`} else {`
			`if(!UTF_IS_SURROGATE(c)) {`
			`/* callback(unassigned) */`
			`reason=UCNV_UNASSIGNED;`
			`*pErrorCode=U_INVALID_CHAR_FOUND;`
			`} else if(UTF_IS_SURROGATE_FIRST(c)) {`
			`getTrail:`
			`if(source<sourceLimit) {`
			`/* test the following code unit */`
			`UChar trail=*source;`
			`if(UTF_IS_SECOND_SURROGATE(trail)) {`
			`++source;`
			`c=UTF16_GET_PAIR_VALUE(c, trail);`
			`/* this codepage does not map supplementary code points */`
			`/* callback(unassigned) */`
			`reason=UCNV_UNASSIGNED;`
			`*pErrorCode=U_INVALID_CHAR_FOUND;`
			`} else {`
			`/* this is an unmatched lead code unit (1st surrogate) */`
			`/* callback(illegal) */`
			`reason=UCNV_ILLEGAL;`
			`*pErrorCode=U_ILLEGAL_CHAR_FOUND;`
			`}`
			`} else {`
			`/* no more input */`
			`break;`
			`}`
			`} else {`
			`/* this is an unmatched trail code unit (2nd surrogate) */`
			`/* callback(illegal) */`
			`reason=UCNV_ILLEGAL;`
			`*pErrorCode=U_ILLEGAL_CHAR_FOUND;`
ICU-266 c++-ify UConverter (UConverterImpl resembles a vtable) X-SVN-Rev: 723 2000-02-07 17:35:21 +00:00			`}`
ICU-484 reimplement LATIN_1 to work correctly with offsets and UTF-16 X-SVN-Rev: 3285 2000-12-20 01:22:02 +00:00
			`/* call the callback function with all the preparations and post-processing */`
			`/* get the number of code units for c to correctly advance sourceIndex after the callback call */`
			`length=UTF_CHAR_LENGTH(c);`

			`/* set offsets since the start or the last callback */`
			`if(offsets!=NULL) {`
			`int32_t count=(int32_t)(source-lastSource);`

			`/* do not set the offset for the callback-causing character */`
			`count-=length;`

			`while(count>0) {`
			`*offsets++=sourceIndex++;`
			`--count;`
			`}`
			`/* offset and sourceIndex are now set for the current character */`
			`}`

			`/* update the arguments structure */`
			`pArgs->source=source;`
			`pArgs->target=(char *)target;`
			`pArgs->offsets=offsets;`

			`/* set the converter state in UConverter to deal with the next character */`
			`cnv->fromUSurrogateLead=0;`

			`/* write the code point as code units */`
			`i=0;`
			`UTF_APPEND_CHAR_UNSAFE(cnv->invalidUCharBuffer, i, c);`
			`cnv->invalidUCharLength=(int8_t)i;`
			`/* i==length */`

			`/* call the callback function */`
			`cnv->fromUCharErrorBehaviour(cnv->fromUContext, pArgs, cnv->invalidUCharBuffer, i, c, reason, pErrorCode);`

			`/* get the converter state from UConverter */`
			`c=cnv->fromUSurrogateLead;`

			`/* update target and deal with offsets if necessary */`
			`offsets=ucnv_updateCallbackOffsets(offsets, ((uint8_t *)pArgs->target)-target, sourceIndex);`
			`target=(uint8_t *)pArgs->target;`

			`/* update the source pointer and index */`
			`sourceIndex+=length+(pArgs->source-source);`
			`source=lastSource=pArgs->source;`
			`targetCapacity=(uint8_t *)pArgs->targetLimit-target;`
			`length=sourceLimit-source;`
			`if(length<targetCapacity) {`
			`targetCapacity=length;`
			`}`

			`/*`
			`* If the callback overflowed the target, then we need to`
			`* stop here with an overflow indication.`
			`*/`
			`if(*pErrorCode==U_BUFFER_OVERFLOW_ERROR) {`
			`break;`
			`} else if(U_FAILURE(*pErrorCode)) {`
			`/* break on error */`
			`c=0;`
			`break;`
			`} else if(cnv->charErrorBufferLength>0) {`
			`/* target is full */`
			`*pErrorCode=U_BUFFER_OVERFLOW_ERROR;`
			`break;`
ICU-266 c++-ify UConverter (UConverterImpl resembles a vtable) X-SVN-Rev: 723 2000-02-07 17:35:21 +00:00			`}`
			`}`
			`}`

ICU-484 reimplement LATIN_1 to work correctly with offsets and UTF-16 X-SVN-Rev: 3285 2000-12-20 01:22:02 +00:00			`if(U_SUCCESS(pErrorCode) && source<sourceLimit && target>=(uint8_t )pArgs->targetLimit) {`
			`/* target is full */`
			`*pErrorCode=U_BUFFER_OVERFLOW_ERROR;`
			`}`
ICU-266 c++-ify UConverter (UConverterImpl resembles a vtable) X-SVN-Rev: 723 2000-02-07 17:35:21 +00:00
ICU-484 reimplement LATIN_1 to work correctly with offsets and UTF-16 X-SVN-Rev: 3285 2000-12-20 01:22:02 +00:00			`/* set offsets since the start or the last callback */`
			`if(offsets!=NULL) {`
			`size_t count=source-lastSource;`
			`while(count>0) {`
			`*offsets++=sourceIndex++;`
			`--count;`
			`}`
			`}`
ICU-266 c++-ify UConverter (UConverterImpl resembles a vtable) X-SVN-Rev: 723 2000-02-07 17:35:21 +00:00
ICU-484 reimplement LATIN_1 to work correctly with offsets and UTF-16 X-SVN-Rev: 3285 2000-12-20 01:22:02 +00:00			`if(pArgs->flush && source>=sourceLimit) {`
			`/* reset the state for the next conversion */`
			`if(c!=0 && U_SUCCESS(*pErrorCode)) {`
			`/* a Unicode code point remains incomplete (only a first surrogate) */`
			`*pErrorCode=U_TRUNCATED_CHAR_FOUND;`
			`}`
			`cnv->fromUSurrogateLead=0;`
			`} else {`
			`/* set the converter state back into UConverter */`
			`cnv->fromUSurrogateLead=(UChar)c;`
ICU-266 c++-ify UConverter (UConverterImpl resembles a vtable) X-SVN-Rev: 723 2000-02-07 17:35:21 +00:00			`}`
ICU-418 fix latin-1 ucnv_getNextUChar() - zero-extend, not sign-extend, input byte X-SVN-Rev: 1543 2000-06-02 00:05:22 +00:00
ICU-484 reimplement LATIN_1 to work correctly with offsets and UTF-16 X-SVN-Rev: 3285 2000-12-20 01:22:02 +00:00			`/* write back the updated pointers */`
			`pArgs->source=source;`
			`pArgs->target=(char *)target;`
			`pArgs->offsets=offsets;`
ICU-266 c++-ify UConverter (UConverterImpl resembles a vtable) X-SVN-Rev: 723 2000-02-07 17:35:21 +00:00			`}`

ICU-266 c++-ify UConverter (UConverterImpl resembles a vtable) X-SVN-Rev: 736 2000-02-09 19:15:17 +00:00			`static const UConverterImpl _Latin1Impl={`
ICU-266 c++-ify UConverter (UConverterImpl resembles a vtable) X-SVN-Rev: 723 2000-02-07 17:35:21 +00:00			`UCNV_LATIN_1,`

ICU-266 c++-ify UConverter (UConverterImpl resembles a vtable) X-SVN-Rev: 734 2000-02-08 23:41:16 +00:00			`NULL,`
			`NULL,`

			`NULL,`
			`NULL,`
			`NULL,`

ICU-484 reimplement LATIN_1 to work correctly with offsets and UTF-16 X-SVN-Rev: 3285 2000-12-20 01:22:02 +00:00			`_Latin1ToUnicodeWithOffsets,`
			`_Latin1ToUnicodeWithOffsets,`
			`_Latin1FromUnicodeWithOffsets,`
			`_Latin1FromUnicodeWithOffsets,`
			`_Latin1GetNextUChar,`
ICU-266 c++-ify UConverter (UConverterImpl resembles a vtable) X-SVN-Rev: 736 2000-02-09 19:15:17 +00:00
ICU-535 Fixed compiler warnings X-SVN-Rev: 3006 2000-11-27 17:40:41 +00:00			`NULL,`
ICU-266 c++-ify UConverter (UConverterImpl resembles a vtable) X-SVN-Rev: 736 2000-02-09 19:15:17 +00:00			`NULL`
ICU-266 c++-ify UConverter (UConverterImpl resembles a vtable) X-SVN-Rev: 723 2000-02-07 17:35:21 +00:00			`};`

ICU-705 add algorithmic US-ASCII converter X-SVN-Rev: 3286 2000-12-20 02:08:39 +00:00			`static const UConverterStaticData _Latin1StaticData={`
ICU-484 add unicodeMask to UConverterStaticData for optimized implementations X-SVN-Rev: 3280 2000-12-19 23:07:50 +00:00			`sizeof(UConverterStaticData),`
			`"LATIN_1",`
ICU-267 make CNV file format portable. (+ makeconv cleanup) X-SVN-Rev: 1176 2000-04-19 23:05:27 +00:00			`819, UCNV_IBM, UCNV_LATIN_1, 1, 1,`
ICU-705 add algorithmic US-ASCII converter X-SVN-Rev: 3286 2000-12-20 02:08:39 +00:00			`{ 0x1a, 0, 0, 0 }, 1, FALSE, FALSE,`
ICU-484 add unicodeMask to UConverterStaticData for optimized implementations X-SVN-Rev: 3280 2000-12-19 23:07:50 +00:00			`0,`
			`{ 0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0 } /* reserved */`
ICU-267 make CNV file format portable. (+ makeconv cleanup) X-SVN-Rev: 1176 2000-04-19 23:05:27 +00:00			`};`

ICU-353 basic warnings cleanup (not touching object's memory layout). A basic build now compiles w/o a single warning on Linux. One with --enable-strict is a different matter... X-SVN-Rev: 1124 2000-04-13 23:00:43 +00:00			`const UConverterSharedData _Latin1Data={`
ICU-98 These are changes to support 64-bit platforms, such as Sun SPARC V9, which is the only one tested so far. X-SVN-Rev: 1022 2000-03-29 21:41:11 +00:00			`sizeof(UConverterSharedData), ~((uint32_t) 0),`
ICU-267 make CNV file format portable. (+ makeconv cleanup) X-SVN-Rev: 1176 2000-04-19 23:05:27 +00:00			`NULL, NULL, &_Latin1StaticData, FALSE, &_Latin1Impl,`
			`0`
ICU-266 c++-ify UConverter (UConverterImpl resembles a vtable) X-SVN-Rev: 723 2000-02-07 17:35:21 +00:00			`};`
ICU-705 add algorithmic US-ASCII converter X-SVN-Rev: 3286 2000-12-20 02:08:39 +00:00
			`/* US-ASCII ----------------------------------------------------------------- */`

			`/* This is a table-less version of _MBCSSingleToBMPWithOffsets(). */`
			`U_CFUNC void`
			`_ASCIIToUnicodeWithOffsets(UConverterToUnicodeArgs *pArgs,`
			`UErrorCode *pErrorCode) {`
			`const uint8_t source, sourceLimit, *lastSource;`
			`UChar *target;`
			`int32_t targetCapacity, length;`
			`int32_t *offsets;`

			`int32_t sourceIndex;`
			`uint8_t b;`

			`/* set up the local pointers */`
			`source=(const uint8_t *)pArgs->source;`
			`sourceLimit=(const uint8_t *)pArgs->sourceLimit;`
			`target=pArgs->target;`
			`targetCapacity=pArgs->targetLimit-pArgs->target;`
			`offsets=pArgs->offsets;`

			`/* sourceIndex=-1 if the current character began in the previous buffer */`
			`sourceIndex=0;`
			`lastSource=source;`

			`/*`
			`* since the conversion here is 1:1 UChar:uint8_t, we need only one counter`
			`* for the minimum of the sourceLength and targetCapacity`
			`*/`
			`length=sourceLimit-source;`
			`if(length<targetCapacity) {`
			`targetCapacity=length;`
			`}`

			`/* conversion loop */`
			`while(targetCapacity>0) {`
			`b=*source++;`
			`if(b<=0x7f) {`
			`*target++=b;`
			`--targetCapacity;`
			`} else {`
			`/* call the callback function with all the preparations and post-processing */`
			`UConverter *cnv=pArgs->converter;`

			`/* callback(illegal) */`
			`*pErrorCode=U_ILLEGAL_CHAR_FOUND;`

			`/* set offsets since the start or the last callback */`
			`if(offsets!=NULL) {`
			`int32_t count=(int32_t)(source-lastSource);`

			`/* predecrement: do not set the offset for the callback-causing character */`
			`while(--count>0) {`
			`*offsets++=sourceIndex++;`
			`}`
			`/* offset and sourceIndex are now set for the current character */`
			`}`

			`/* update the arguments structure */`
			`pArgs->source=(const char *)source;`
			`pArgs->target=target;`
			`pArgs->offsets=offsets;`

			`/* copy the current bytes to invalidCharBuffer */`
			`cnv->invalidCharBuffer[0]=b;`
			`cnv->invalidCharLength=1;`

			`/* call the callback function */`
			`cnv->fromCharErrorBehaviour(cnv->toUContext, pArgs, cnv->invalidCharBuffer, 1, UCNV_ILLEGAL, pErrorCode);`

			`/* update target and deal with offsets if necessary */`
			`offsets=ucnv_updateCallbackOffsets(offsets, pArgs->target-target, sourceIndex);`
			`target=pArgs->target;`

			`/* update the source pointer and index */`
			`sourceIndex+=1+((const uint8_t *)pArgs->source-source);`
			`source=lastSource=(const uint8_t *)pArgs->source;`
			`targetCapacity=pArgs->targetLimit-target;`
			`length=sourceLimit-source;`
			`if(length<targetCapacity) {`
			`targetCapacity=length;`
			`}`

			`/*`
			`* If the callback overflowed the target, then we need to`
			`* stop here with an overflow indication.`
			`*/`
			`if(*pErrorCode==U_BUFFER_OVERFLOW_ERROR) {`
			`break;`
			`} else if(U_FAILURE(*pErrorCode)) {`
			`/* break on error */`
			`break;`
			`} else if(cnv->UCharErrorBufferLength>0) {`
			`/* target is full */`
			`*pErrorCode=U_BUFFER_OVERFLOW_ERROR;`
			`break;`
			`}`
			`}`
			`}`

			`if(U_SUCCESS(*pErrorCode) && source<sourceLimit && target>=pArgs->targetLimit) {`
			`/* target is full */`
			`*pErrorCode=U_BUFFER_OVERFLOW_ERROR;`
			`}`

			`/* set offsets since the start or the last callback */`
			`if(offsets!=NULL) {`
			`size_t count=source-lastSource;`
			`while(count>0) {`
			`*offsets++=sourceIndex++;`
			`--count;`
			`}`
			`}`

			`/* write back the updated pointers */`
			`pArgs->source=(const char *)source;`
			`pArgs->target=target;`
			`pArgs->offsets=offsets;`
			`}`

			`/* This is a table-less version of _MBCSSingleGetNextUChar(). */`
			`U_CFUNC UChar32`
			`_ASCIIGetNextUChar(UConverterToUnicodeArgs *pArgs,`
			`UErrorCode *pErrorCode) {`
			`UChar buffer[UTF_MAX_CHAR_LENGTH];`
			`const uint8_t *source;`
			`uint8_t b;`

			`/* set up the local pointers */`
			`source=(const uint8_t *)pArgs->source;`

			`/* conversion loop */`
			`while(source<(const uint8_t *)pArgs->sourceLimit) {`
			`b=*source++;`
			`pArgs->source=(const char *)source;`
			`if(b<=0x7f) {`
			`return b;`
			`} else {`
			`/* call the callback function with all the preparations and post-processing */`
			`UConverter *cnv=pArgs->converter;`

			`/* callback(illegal) */`
			`*pErrorCode=U_ILLEGAL_CHAR_FOUND;`

			`/* update the arguments structure */`
			`pArgs->target=buffer;`
			`pArgs->targetLimit=buffer+UTF_MAX_CHAR_LENGTH;`

			`/* copy the current byte to invalidCharBuffer */`
			`cnv->invalidCharBuffer[0]=(char)b;`
			`cnv->invalidCharLength=1;`

			`/* call the callback function */`
			`cnv->fromCharErrorBehaviour(cnv->toUContext, pArgs, cnv->invalidCharBuffer, 1, UCNV_ILLEGAL, pErrorCode);`

			`/* update the source pointer */`
			`source=(const uint8_t *)pArgs->source;`

			`/*`
			`* return the first character if the callback wrote some`
			`* we do not need to goto finish because the converter state is already set`
			`*/`
			`if(U_SUCCESS(*pErrorCode)) {`
			`int32_t length=pArgs->target-buffer;`
			`if(length>0) {`
			`return ucnv_getUChar32KeepOverflow(cnv, buffer, length);`
			`}`
			`/* else (callback did not write anything) continue */`
			`} else if(*pErrorCode==U_BUFFER_OVERFLOW_ERROR) {`
			`*pErrorCode=U_ZERO_ERROR;`
			`return ucnv_getUChar32KeepOverflow(cnv, buffer, UTF_MAX_CHAR_LENGTH);`
			`} else {`
			`/* break on error */`
			`/* ### what if a callback set an error but _also_ generated output?! */`
			`return 0xffff;`
			`}`
			`}`
			`}`

			`/* no output because of empty input or only skipping callbacks */`
			`*pErrorCode=U_INDEX_OUTOFBOUNDS_ERROR;`
			`return 0xffff;`
			`}`

			`static const UConverterImpl _ASCIIImpl={`
			`UCNV_US_ASCII,`

			`NULL,`
			`NULL,`

			`NULL,`
			`NULL,`
			`NULL,`

			`_ASCIIToUnicodeWithOffsets,`
			`_ASCIIToUnicodeWithOffsets,`
			`_Latin1FromUnicodeWithOffsets,`
			`_Latin1FromUnicodeWithOffsets,`
			`_ASCIIGetNextUChar,`

			`NULL,`
			`NULL`
			`};`

			`static const UConverterStaticData _ASCIIStaticData={`
			`sizeof(UConverterStaticData),`
			`"US-ASCII",`
			`367, UCNV_IBM, UCNV_US_ASCII, 1, 1,`
			`{ 0x1a, 0, 0, 0 }, 1, FALSE, FALSE,`
			`0,`
			`{ 0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0 } /* reserved */`
			`};`

			`const UConverterSharedData _ASCIIData={`
			`sizeof(UConverterSharedData), ~((uint32_t) 0),`
			`NULL, NULL, &_ASCIIStaticData, FALSE, &_ASCIIImpl,`
			`0`
			`};`