scuffed-code/icu4c/source/i18n/csdetect.cpp

/*
 **********************************************************************
 *   Copyright (C) 2005-2012, International Business Machines
 *   Corporation and others.  All Rights Reserved.
 **********************************************************************
 */

#include "unicode/utypes.h"

#if !UCONFIG_NO_CONVERSION

#include "unicode/ucsdet.h"

#include "csdetect.h"
#include "csmatch.h"
#include "uenumimp.h"

#include "cmemory.h"
#include "cstring.h"
#include "umutex.h"
#include "ucln_in.h"
#include "uarrsort.h"
#include "inputext.h"
#include "csrsbcs.h"
#include "csrmbcs.h"
#include "csrutf8.h"
#include "csrucode.h"
#include "csr2022.h"

#define ARRAY_SIZE(array) (sizeof array / sizeof array[0])

#define NEW_ARRAY(type,count) (type *) uprv_malloc((count) * sizeof(type))
#define DELETE_ARRAY(array) uprv_free((void *) (array))

U_CDECL_BEGIN
static icu::CharsetRecognizer **fCSRecognizers = NULL;

static int32_t fCSRecognizers_size = 0;

static UBool U_CALLCONV csdet_cleanup(void)
{
    if (fCSRecognizers != NULL) {
        for(int32_t r = 0; r < fCSRecognizers_size; r += 1) {
            delete fCSRecognizers[r];
            fCSRecognizers[r] = NULL;
        }

        DELETE_ARRAY(fCSRecognizers);
        fCSRecognizers = NULL;
        fCSRecognizers_size = 0;
    }

    return TRUE;
}

static int32_t U_CALLCONV
charsetMatchComparator(const void * /*context*/, const void *left, const void *right)
{
    U_NAMESPACE_USE

    const CharsetMatch **csm_l = (const CharsetMatch **) left;
    const CharsetMatch **csm_r = (const CharsetMatch **) right;

    // NOTE: compare is backwards to sort from highest to lowest.
    return (*csm_r)->getConfidence() - (*csm_l)->getConfidence();
}

U_CDECL_END

U_NAMESPACE_BEGIN

void CharsetDetector::setRecognizers(UErrorCode &status)
{
    UBool needsInit;
    CharsetRecognizer **recognizers;

    if (U_FAILURE(status)) {
        return;
    }

    UMTX_CHECK(NULL, (UBool) (fCSRecognizers == NULL), needsInit);

    if (needsInit) {
        CharsetRecognizer *tempArray[] = {
            new CharsetRecog_UTF8(),

            new CharsetRecog_UTF_16_BE(),
            new CharsetRecog_UTF_16_LE(),
            new CharsetRecog_UTF_32_BE(),
            new CharsetRecog_UTF_32_LE(),

            new CharsetRecog_8859_1(),
            new CharsetRecog_8859_2(),
            new CharsetRecog_8859_5_ru(),
            new CharsetRecog_8859_6_ar(),
            new CharsetRecog_8859_7_el(),
            new CharsetRecog_8859_8_I_he(),
            new CharsetRecog_8859_8_he(),
            new CharsetRecog_windows_1251(),
            new CharsetRecog_windows_1256(),
            new CharsetRecog_KOI8_R(),
            new CharsetRecog_8859_9_tr(),
            new CharsetRecog_sjis(),
            new CharsetRecog_gb_18030(),
            new CharsetRecog_euc_jp(),
            new CharsetRecog_euc_kr(),
            new CharsetRecog_big5(),

            new CharsetRecog_2022JP(),
            new CharsetRecog_2022KR(),
            new CharsetRecog_2022CN(),
            
            new CharsetRecog_IBM424_he_rtl(),
            new CharsetRecog_IBM424_he_ltr(),
            new CharsetRecog_IBM420_ar_rtl(),
            new CharsetRecog_IBM420_ar_ltr()
        };
        int32_t rCount = ARRAY_SIZE(tempArray);
        int32_t r;

        recognizers = NEW_ARRAY(CharsetRecognizer *, rCount);

        if (recognizers == NULL) {
            status = U_MEMORY_ALLOCATION_ERROR;
            return;
        } else {
            for (r = 0; r < rCount; r += 1) {
                recognizers[r] = tempArray[r];

                if (recognizers[r] == NULL) {
                    status = U_MEMORY_ALLOCATION_ERROR;
                    break;
                }
            }
        }

        if (U_SUCCESS(status)) {
            umtx_lock(NULL);
            if (fCSRecognizers == NULL) {
                fCSRecognizers_size = rCount;
                fCSRecognizers = recognizers;
            }
            umtx_unlock(NULL);
        }

        if (fCSRecognizers != recognizers) {
            for (r = 0; r < rCount; r += 1) {
                delete recognizers[r];
                recognizers[r] = NULL;
            }

            DELETE_ARRAY(recognizers);
        }

        recognizers = NULL;
        ucln_i18n_registerCleanup(UCLN_I18N_CSDET, csdet_cleanup);
    }
}

CharsetDetector::CharsetDetector(UErrorCode &status)
  : textIn(new InputText(status)), resultArray(NULL),
    resultCount(0), fStripTags(FALSE), fFreshTextSet(FALSE)
{
    if (U_FAILURE(status)) {
        return;
    }

    setRecognizers(status);

    if (U_FAILURE(status)) {
        return;
    }

    resultArray = (CharsetMatch **)uprv_malloc(sizeof(CharsetMatch *)*fCSRecognizers_size);

    if (resultArray == NULL) {
        status = U_MEMORY_ALLOCATION_ERROR;
        return;
    }

    for(int32_t i = 0; i < fCSRecognizers_size; i += 1) {
        resultArray[i] = new CharsetMatch();

        if (resultArray[i] == NULL) {
            status = U_MEMORY_ALLOCATION_ERROR;
            break;
        }
    }
}

CharsetDetector::~CharsetDetector()
{
    delete textIn;

    for(int32_t i = 0; i < fCSRecognizers_size; i += 1) {
        delete resultArray[i];
    }

    uprv_free(resultArray);
}

void CharsetDetector::setText(const char *in, int32_t len)
{
    textIn->setText(in, len);
    fFreshTextSet = TRUE;
}

UBool CharsetDetector::setStripTagsFlag(UBool flag)
{
    UBool temp = fStripTags;
    fStripTags = flag;
    fFreshTextSet = TRUE;
    return temp;
}

UBool CharsetDetector::getStripTagsFlag() const
{
    return fStripTags;
}

void CharsetDetector::setDeclaredEncoding(const char *encoding, int32_t len) const
{
    textIn->setDeclaredEncoding(encoding,len);
}

int32_t CharsetDetector::getDetectableCount()
{
    UErrorCode status = U_ZERO_ERROR;

    setRecognizers(status);

    return fCSRecognizers_size; 
}

const CharsetMatch *CharsetDetector::detect(UErrorCode &status)
{
    int32_t maxMatchesFound = 0;

    detectAll(maxMatchesFound, status);

    if(maxMatchesFound > 0) {
        return resultArray[0];
    } else {
        return NULL;
    }
}

const CharsetMatch * const *CharsetDetector::detectAll(int32_t &maxMatchesFound, UErrorCode &status)
{
    if(!textIn->isSet()) {
        status = U_MISSING_RESOURCE_ERROR;// TODO:  Need to set proper status code for input text not set

        return NULL;
    } else if (fFreshTextSet) {
        CharsetRecognizer *csr;
        int32_t            i;

        textIn->MungeInput(fStripTags);

        // Iterate over all possible charsets, remember all that
        // give a match quality > 0.
        resultCount = 0;
        for (i = 0; i < fCSRecognizers_size; i += 1) {
            csr = fCSRecognizers[i];
            if (csr->match(textIn, resultArray[resultCount])) {
                resultCount++;
            }
        }

        if (resultCount > 1) {
            uprv_sortArray(resultArray, resultCount, sizeof resultArray[0], charsetMatchComparator, NULL, TRUE, &status);
        }
        fFreshTextSet = FALSE;
    }

    maxMatchesFound = resultCount;

    return resultArray;
}

/*const char *CharsetDetector::getCharsetName(int32_t index, UErrorCode &status) const
{
    if( index > fCSRecognizers_size-1 || index < 0) {
        status = U_INDEX_OUTOFBOUNDS_ERROR;

        return 0;
    } else {
        return fCSRecognizers[index]->getName();
    }
}*/

U_NAMESPACE_END

U_CDECL_BEGIN
typedef struct {
    int32_t currIndex;
} Context;


static void U_CALLCONV
enumClose(UEnumeration *en) {
    if(en->context != NULL) {
        DELETE_ARRAY(en->context);
    }

    DELETE_ARRAY(en);
}

static int32_t U_CALLCONV
enumCount(UEnumeration *, UErrorCode *) {
    return fCSRecognizers_size;
}

static const char* U_CALLCONV
enumNext(UEnumeration *en, int32_t *resultLength, UErrorCode * /*status*/) {
    if(((Context *)en->context)->currIndex >= fCSRecognizers_size) {
        if(resultLength != NULL) {
            *resultLength = 0;
        }
        return NULL;
    }
    const char *currName = fCSRecognizers[((Context *)en->context)->currIndex]->getName();
    if(resultLength != NULL) {
        *resultLength = (int32_t)uprv_strlen(currName);
    }
    ((Context *)en->context)->currIndex++;

    return currName;
}

static void U_CALLCONV
enumReset(UEnumeration *en, UErrorCode *) {
    ((Context *)en->context)->currIndex = 0;
}

static const UEnumeration gCSDetEnumeration = {
    NULL,
    NULL,
    enumClose,
    enumCount,
    uenum_unextDefault,
    enumNext,
    enumReset
};

U_CAPI  UEnumeration * U_EXPORT2
ucsdet_getAllDetectableCharsets(const UCharsetDetector * /*ucsd*/, UErrorCode *status)
{
    U_NAMESPACE_USE

    if(U_FAILURE(*status)) {
        return 0;
    }

    /* Initialize recognized charsets. */
    CharsetDetector::getDetectableCount();

    UEnumeration *en = NEW_ARRAY(UEnumeration, 1);
    memcpy(en, &gCSDetEnumeration, sizeof(UEnumeration));
    en->context = (void*)NEW_ARRAY(Context, 1);
    uprv_memset(en->context, 0, sizeof(Context));
    return en;
}
U_CDECL_END

#endif
ICU-4639 Initial checkin of C port of CharsetDetection. X-SVN-Rev: 19069 2006-02-06 18:03:11 +00:00			`/*`
			`**********************************************************************`
ICU-6954 Fix charset detection bug resulting from having state in shared recognizer objects. X-SVN-Rev: 31900 2012-06-01 20:40:48 +00:00			`* Copyright (C) 2005-2012, International Business Machines`
ICU-4639 Initial checkin of C port of CharsetDetection. X-SVN-Rev: 19069 2006-02-06 18:03:11 +00:00			`* Corporation and others. All Rights Reserved.`
			`**********************************************************************`
			`*/`

			`#include "unicode/utypes.h"`
ICU-5198 Disable charset detection when UCONFIG_NO_CONVERSION is 1. X-SVN-Rev: 19622 2006-05-09 18:06:10 +00:00
			`#if !UCONFIG_NO_CONVERSION`

ICU-4639 Improve the thread safety and simplify the UEnumeration X-SVN-Rev: 19078 2006-02-07 07:50:53 +00:00			`#include "unicode/ucsdet.h"`
ICU-4639 Initial checkin of C port of CharsetDetection. X-SVN-Rev: 19069 2006-02-06 18:03:11 +00:00
			`#include "csdetect.h"`
			`#include "csmatch.h"`
ICU-4639 Improve the thread safety and simplify the UEnumeration X-SVN-Rev: 19078 2006-02-07 07:50:53 +00:00			`#include "uenumimp.h"`
ICU-4639 Initial checkin of C port of CharsetDetection. X-SVN-Rev: 19069 2006-02-06 18:03:11 +00:00
			`#include "cmemory.h"`
ICU-4639 Improve the thread safety and simplify the UEnumeration X-SVN-Rev: 19078 2006-02-07 07:50:53 +00:00			`#include "cstring.h"`
ICU-4639 Initial checkin of C port of CharsetDetection. X-SVN-Rev: 19069 2006-02-06 18:03:11 +00:00			`#include "umutex.h"`
			`#include "ucln_in.h"`
ICU-4639 code review comments. X-SVN-Rev: 20125 2006-08-21 23:35:23 +00:00			`#include "uarrsort.h"`
ICU-4639 Initial checkin of C port of CharsetDetection. X-SVN-Rev: 19069 2006-02-06 18:03:11 +00:00			`#include "inputext.h"`
			`#include "csrsbcs.h"`
			`#include "csrmbcs.h"`
			`#include "csrutf8.h"`
			`#include "csrucode.h"`
			`#include "csr2022.h"`

			`#define ARRAY_SIZE(array) (sizeof array / sizeof array[0])`

			`#define NEW_ARRAY(type,count) (type ) uprv_malloc((count) sizeof(type))`
			`#define DELETE_ARRAY(array) uprv_free((void *) (array))`

			`U_CDECL_BEGIN`
ICU-8680 require C++ namespace, replace most U_NAMESPACE_QUALIFIER with icu::, remove still-draft U_STD_NS, U_STD_NSQ, and U_STD_NS_USE X-SVN-Rev: 30281 2011-07-06 04:03:35 +00:00			`static icu::CharsetRecognizer **fCSRecognizers = NULL;`
ICU-4639 Improve the thread safety and simplify the UEnumeration X-SVN-Rev: 19078 2006-02-07 07:50:53 +00:00
			`static int32_t fCSRecognizers_size = 0;`

ICU-4639 Initial checkin of C port of CharsetDetection. X-SVN-Rev: 19069 2006-02-06 18:03:11 +00:00			`static UBool U_CALLCONV csdet_cleanup(void)`
			`{`
ICU-4639 Improve the thread safety and simplify the UEnumeration X-SVN-Rev: 19078 2006-02-07 07:50:53 +00:00			`if (fCSRecognizers != NULL) {`
			`for(int32_t r = 0; r < fCSRecognizers_size; r += 1) {`
			`delete fCSRecognizers[r];`
			`fCSRecognizers[r] = NULL;`
			`}`

			`DELETE_ARRAY(fCSRecognizers);`
			`fCSRecognizers = NULL;`
			`fCSRecognizers_size = 0;`
			`}`

			`return TRUE;`
ICU-4639 Initial checkin of C port of CharsetDetection. X-SVN-Rev: 19069 2006-02-06 18:03:11 +00:00			`}`
ICU-4639 Fix a compiler warning and a z/OS runtime error introduced from last check-in. X-SVN-Rev: 20129 2006-08-22 08:31:57 +00:00
			`static int32_t U_CALLCONV`
ICU-5445 Fix some compiler warnings X-SVN-Rev: 21283 2007-03-17 06:13:14 +00:00			`charsetMatchComparator(const void * /context/, const void left, const void right)`
ICU-4639 Fix a compiler warning and a z/OS runtime error introduced from last check-in. X-SVN-Rev: 20129 2006-08-22 08:31:57 +00:00			`{`
ICU-5304 Allow source code to work again without using U_NAMESPACE_USE X-SVN-Rev: 20242 2006-09-04 16:36:21 +00:00			`U_NAMESPACE_USE`

ICU-4639 Fix a compiler warning and a z/OS runtime error introduced from last check-in. X-SVN-Rev: 20129 2006-08-22 08:31:57 +00:00			`const CharsetMatch csm_l = (const CharsetMatch ) left;`
			`const CharsetMatch csm_r = (const CharsetMatch ) right;`

			`// NOTE: compare is backwards to sort from highest to lowest.`
			`return (csm_r)->getConfidence() - (csm_l)->getConfidence();`
			`}`

ICU-4639 Initial checkin of C port of CharsetDetection. X-SVN-Rev: 19069 2006-02-06 18:03:11 +00:00			`U_CDECL_END`

ICU-4639 Improve the thread safety and simplify the UEnumeration X-SVN-Rev: 19078 2006-02-07 07:50:53 +00:00			`U_NAMESPACE_BEGIN`
ICU-4639 Initial checkin of C port of CharsetDetection. X-SVN-Rev: 19069 2006-02-06 18:03:11 +00:00
ICU-4639 code review comments. X-SVN-Rev: 20125 2006-08-21 23:35:23 +00:00			`void CharsetDetector::setRecognizers(UErrorCode &status)`
ICU-4639 Initial checkin of C port of CharsetDetection. X-SVN-Rev: 19069 2006-02-06 18:03:11 +00:00			`{`
			`UBool needsInit;`
			`CharsetRecognizer **recognizers;`

ICU-4639 code review comments. X-SVN-Rev: 20125 2006-08-21 23:35:23 +00:00			`if (U_FAILURE(status)) {`
			`return;`
			`}`

ICU-5349 Use UMTX_CHECK for double check locking. X-SVN-Rev: 20176 2006-08-29 04:57:05 +00:00			`UMTX_CHECK(NULL, (UBool) (fCSRecognizers == NULL), needsInit);`
ICU-4639 Initial checkin of C port of CharsetDetection. X-SVN-Rev: 19069 2006-02-06 18:03:11 +00:00
			`if (needsInit) {`
			`CharsetRecognizer *tempArray[] = {`
			`new CharsetRecog_UTF8(),`

			`new CharsetRecog_UTF_16_BE(),`
			`new CharsetRecog_UTF_16_LE(),`
			`new CharsetRecog_UTF_32_BE(),`
			`new CharsetRecog_UTF_32_LE(),`

ICU-6954 Fix charset detection bug resulting from having state in shared recognizer objects. X-SVN-Rev: 31900 2012-06-01 20:40:48 +00:00			`new CharsetRecog_8859_1(),`
			`new CharsetRecog_8859_2(),`
ICU-4639 Initial checkin of C port of CharsetDetection. X-SVN-Rev: 19069 2006-02-06 18:03:11 +00:00			`new CharsetRecog_8859_5_ru(),`
			`new CharsetRecog_8859_6_ar(),`
			`new CharsetRecog_8859_7_el(),`
			`new CharsetRecog_8859_8_I_he(),`
			`new CharsetRecog_8859_8_he(),`
			`new CharsetRecog_windows_1251(),`
			`new CharsetRecog_windows_1256(),`
			`new CharsetRecog_KOI8_R(),`
			`new CharsetRecog_8859_9_tr(),`
			`new CharsetRecog_sjis(),`
			`new CharsetRecog_gb_18030(),`
			`new CharsetRecog_euc_jp(),`
			`new CharsetRecog_euc_kr(),`
ICU-4639 Add Big5, use sorted mbcs statistics, increase coverage. X-SVN-Rev: 19152 2006-02-13 20:47:36 +00:00			`new CharsetRecog_big5(),`
ICU-4639 Initial checkin of C port of CharsetDetection. X-SVN-Rev: 19069 2006-02-06 18:03:11 +00:00
			`new CharsetRecog_2022JP(),`
			`new CharsetRecog_2022KR(),`
ICU-6778 Port IBM420 and IBM424 CharsetDetector to ICU4C. X-SVN-Rev: 25696 2009-03-31 15:39:00 +00:00			`new CharsetRecog_2022CN(),`

ICU-6778 Update ICU4C IBM420 and IBM424 CharsetDetector with changes made to ICU4J. X-SVN-Rev: 25763 2009-04-13 21:32:21 +00:00			`new CharsetRecog_IBM424_he_rtl(),`
			`new CharsetRecog_IBM424_he_ltr(),`
			`new CharsetRecog_IBM420_ar_rtl(),`
			`new CharsetRecog_IBM420_ar_ltr()`
ICU-4639 Initial checkin of C port of CharsetDetection. X-SVN-Rev: 19069 2006-02-06 18:03:11 +00:00			`};`
			`int32_t rCount = ARRAY_SIZE(tempArray);`
			`int32_t r;`

			`recognizers = NEW_ARRAY(CharsetRecognizer *, rCount);`
ICU-4639 code review comments. X-SVN-Rev: 20125 2006-08-21 23:35:23 +00:00
			`if (recognizers == NULL) {`
			`status = U_MEMORY_ALLOCATION_ERROR;`
ICU-6176 Fix some Coverity warnings X-SVN-Rev: 23414 2008-02-13 09:35:50 +00:00			`return;`
ICU-4639 code review comments. X-SVN-Rev: 20125 2006-08-21 23:35:23 +00:00			`} else {`
			`for (r = 0; r < rCount; r += 1) {`
			`recognizers[r] = tempArray[r];`

			`if (recognizers[r] == NULL) {`
			`status = U_MEMORY_ALLOCATION_ERROR;`
			`break;`
			`}`
			`}`
ICU-4639 Initial checkin of C port of CharsetDetection. X-SVN-Rev: 19069 2006-02-06 18:03:11 +00:00			`}`

ICU-4639 code review comments. X-SVN-Rev: 20125 2006-08-21 23:35:23 +00:00			`if (U_SUCCESS(status)) {`
			`umtx_lock(NULL);`
			`if (fCSRecognizers == NULL) {`
			`fCSRecognizers_size = rCount;`
ICU-6480 clean up UMTX_CHECK usage in lazy init X-SVN-Rev: 24439 2008-08-05 00:09:13 +00:00			`fCSRecognizers = recognizers;`
ICU-4639 code review comments. X-SVN-Rev: 20125 2006-08-21 23:35:23 +00:00			`}`
			`umtx_unlock(NULL);`
ICU-4639 Initial checkin of C port of CharsetDetection. X-SVN-Rev: 19069 2006-02-06 18:03:11 +00:00			`}`

			`if (fCSRecognizers != recognizers) {`
			`for (r = 0; r < rCount; r += 1) {`
			`delete recognizers[r];`
			`recognizers[r] = NULL;`
			`}`

			`DELETE_ARRAY(recognizers);`
			`}`

			`recognizers = NULL;`
			`ucln_i18n_registerCleanup(UCLN_I18N_CSDET, csdet_cleanup);`
			`}`
			`}`

ICU-4639 code review comments. X-SVN-Rev: 20125 2006-08-21 23:35:23 +00:00			`CharsetDetector::CharsetDetector(UErrorCode &status)`
ICU-6132 Recover from allocation errors more gracefully. X-SVN-Rev: 23399 2008-02-08 09:10:22 +00:00			`: textIn(new InputText(status)), resultArray(NULL),`
			`resultCount(0), fStripTags(FALSE), fFreshTextSet(FALSE)`
ICU-4639 Initial checkin of C port of CharsetDetection. X-SVN-Rev: 19069 2006-02-06 18:03:11 +00:00			`{`
ICU-4639 code review comments. X-SVN-Rev: 20125 2006-08-21 23:35:23 +00:00			`if (U_FAILURE(status)) {`
			`return;`
			`}`

			`setRecognizers(status);`

			`if (U_FAILURE(status)) {`
			`return;`
			`}`
ICU-4639 Initial checkin of C port of CharsetDetection. X-SVN-Rev: 19069 2006-02-06 18:03:11 +00:00
ICU-5320 Don't use global new and delete X-SVN-Rev: 20063 2006-08-15 06:45:05 +00:00			`resultArray = (CharsetMatch *)uprv_malloc(sizeof(CharsetMatch )*fCSRecognizers_size);`
ICU-4639 Initial checkin of C port of CharsetDetection. X-SVN-Rev: 19069 2006-02-06 18:03:11 +00:00
ICU-4639 code review comments. X-SVN-Rev: 20125 2006-08-21 23:35:23 +00:00			`if (resultArray == NULL) {`
			`status = U_MEMORY_ALLOCATION_ERROR;`
			`return;`
			`}`

ICU-4639 Initial checkin of C port of CharsetDetection. X-SVN-Rev: 19069 2006-02-06 18:03:11 +00:00			`for(int32_t i = 0; i < fCSRecognizers_size; i += 1) {`
ICU-4639 Fix compile / link errors having to do w/ C api functions. X-SVN-Rev: 19071 2006-02-06 20:45:30 +00:00			`resultArray[i] = new CharsetMatch();`
ICU-4639 code review comments. X-SVN-Rev: 20125 2006-08-21 23:35:23 +00:00
			`if (resultArray[i] == NULL) {`
			`status = U_MEMORY_ALLOCATION_ERROR;`
			`break;`
			`}`
ICU-4639 Initial checkin of C port of CharsetDetection. X-SVN-Rev: 19069 2006-02-06 18:03:11 +00:00			`}`
			`}`

ICU-4639 Fix compile / link errors having to do w/ C api functions. X-SVN-Rev: 19071 2006-02-06 20:45:30 +00:00			`CharsetDetector::~CharsetDetector()`
ICU-4639 Initial checkin of C port of CharsetDetection. X-SVN-Rev: 19069 2006-02-06 18:03:11 +00:00			`{`
			`delete textIn;`

			`for(int32_t i = 0; i < fCSRecognizers_size; i += 1) {`
			`delete resultArray[i];`
			`}`

ICU-5320 Don't use global new and delete X-SVN-Rev: 20063 2006-08-15 06:45:05 +00:00			`uprv_free(resultArray);`
ICU-4639 Initial checkin of C port of CharsetDetection. X-SVN-Rev: 19069 2006-02-06 18:03:11 +00:00			`}`

ICU-4639 Fix compile / link errors having to do w/ C api functions. X-SVN-Rev: 19071 2006-02-06 20:45:30 +00:00			`void CharsetDetector::setText(const char *in, int32_t len)`
ICU-4639 Initial checkin of C port of CharsetDetection. X-SVN-Rev: 19069 2006-02-06 18:03:11 +00:00			`{`
			`textIn->setText(in, len);`
			`fFreshTextSet = TRUE;`
			`}`

ICU-4639 Fix compile / link errors having to do w/ C api functions. X-SVN-Rev: 19071 2006-02-06 20:45:30 +00:00			`UBool CharsetDetector::setStripTagsFlag(UBool flag)`
ICU-4639 Initial checkin of C port of CharsetDetection. X-SVN-Rev: 19069 2006-02-06 18:03:11 +00:00			`{`
			`UBool temp = fStripTags;`
			`fStripTags = flag;`
			`fFreshTextSet = TRUE;`
			`return temp;`
			`}`

ICU-4639 Fix compile / link errors having to do w/ C api functions. X-SVN-Rev: 19071 2006-02-06 20:45:30 +00:00			`UBool CharsetDetector::getStripTagsFlag() const`
ICU-4639 Initial checkin of C port of CharsetDetection. X-SVN-Rev: 19069 2006-02-06 18:03:11 +00:00			`{`
			`return fStripTags;`
			`}`

ICU-4639 Fix compile / link errors having to do w/ C api functions. X-SVN-Rev: 19071 2006-02-06 20:45:30 +00:00			`void CharsetDetector::setDeclaredEncoding(const char *encoding, int32_t len) const`
ICU-4639 Initial checkin of C port of CharsetDetection. X-SVN-Rev: 19069 2006-02-06 18:03:11 +00:00			`{`
			`textIn->setDeclaredEncoding(encoding,len);`
			`}`

ICU-4639 Fix compile / link errors having to do w/ C api functions. X-SVN-Rev: 19071 2006-02-06 20:45:30 +00:00			`int32_t CharsetDetector::getDetectableCount()`
ICU-4639 Initial checkin of C port of CharsetDetection. X-SVN-Rev: 19069 2006-02-06 18:03:11 +00:00			`{`
ICU-4639 code review comments. X-SVN-Rev: 20125 2006-08-21 23:35:23 +00:00			`UErrorCode status = U_ZERO_ERROR;`

			`setRecognizers(status);`
ICU-4639 Initial checkin of C port of CharsetDetection. X-SVN-Rev: 19069 2006-02-06 18:03:11 +00:00
			`return fCSRecognizers_size;`
			`}`

ICU-4639 Fix compile / link errors having to do w/ C api functions. X-SVN-Rev: 19071 2006-02-06 20:45:30 +00:00			`const CharsetMatch *CharsetDetector::detect(UErrorCode &status)`
ICU-4639 Initial checkin of C port of CharsetDetection. X-SVN-Rev: 19069 2006-02-06 18:03:11 +00:00			`{`
			`int32_t maxMatchesFound = 0;`

			`detectAll(maxMatchesFound, status);`

			`if(maxMatchesFound > 0) {`
			`return resultArray[0];`
			`} else {`
ICU-4639 Cleanup, more error checking. X-SVN-Rev: 19086 2006-02-07 21:59:16 +00:00			`return NULL;`
ICU-4639 Initial checkin of C port of CharsetDetection. X-SVN-Rev: 19069 2006-02-06 18:03:11 +00:00			`}`
			`}`

ICU-4639 Fix compile / link errors having to do w/ C api functions. X-SVN-Rev: 19071 2006-02-06 20:45:30 +00:00			`const CharsetMatch * const *CharsetDetector::detectAll(int32_t &maxMatchesFound, UErrorCode &status)`
ICU-4639 Initial checkin of C port of CharsetDetection. X-SVN-Rev: 19069 2006-02-06 18:03:11 +00:00			`{`
			`if(!textIn->isSet()) {`
			`status = U_MISSING_RESOURCE_ERROR;// TODO: Need to set proper status code for input text not set`

ICU-4639 resultCount needs to be instance data... X-SVN-Rev: 19084 2006-02-07 20:24:16 +00:00			`return NULL;`
ICU-6954 Fix charset detection bug resulting from having state in shared recognizer objects. X-SVN-Rev: 31900 2012-06-01 20:40:48 +00:00			`} else if (fFreshTextSet) {`
ICU-4639 Initial checkin of C port of CharsetDetection. X-SVN-Rev: 19069 2006-02-06 18:03:11 +00:00			`CharsetRecognizer *csr;`
ICU-5371 Visual Studio 6 fixes. X-SVN-Rev: 20277 2006-09-08 16:48:31 +00:00			`int32_t i;`
ICU-4639 Initial checkin of C port of CharsetDetection. X-SVN-Rev: 19069 2006-02-06 18:03:11 +00:00
			`textIn->MungeInput(fStripTags);`

			`// Iterate over all possible charsets, remember all that`
			`// give a match quality > 0.`
ICU-4639 resultCount needs to be instance data... X-SVN-Rev: 19084 2006-02-07 20:24:16 +00:00			`resultCount = 0;`
ICU-5371 Visual Studio 6 fixes. X-SVN-Rev: 20277 2006-09-08 16:48:31 +00:00			`for (i = 0; i < fCSRecognizers_size; i += 1) {`
ICU-4639 Initial checkin of C port of CharsetDetection. X-SVN-Rev: 19069 2006-02-06 18:03:11 +00:00			`csr = fCSRecognizers[i];`
ICU-6954 Fix charset detection bug resulting from having state in shared recognizer objects. X-SVN-Rev: 31900 2012-06-01 20:40:48 +00:00			`if (csr->match(textIn, resultArray[resultCount])) {`
			`resultCount++;`
ICU-4639 Initial checkin of C port of CharsetDetection. X-SVN-Rev: 19069 2006-02-06 18:03:11 +00:00			`}`
			`}`

ICU-6954 Fix charset detection bug resulting from having state in shared recognizer objects. X-SVN-Rev: 31900 2012-06-01 20:40:48 +00:00			`if (resultCount > 1) {`
			`uprv_sortArray(resultArray, resultCount, sizeof resultArray[0], charsetMatchComparator, NULL, TRUE, &status);`
ICU-6394 charset detection returns duplicate charsets X-SVN-Rev: 25909 2009-04-24 22:24:27 +00:00			`}`
ICU-4639 Initial checkin of C port of CharsetDetection. X-SVN-Rev: 19069 2006-02-06 18:03:11 +00:00			`fFreshTextSet = FALSE;`
			`}`

			`maxMatchesFound = resultCount;`

			`return resultArray;`
			`}`

ICU-5410 Comment out unused code. X-SVN-Rev: 20673 2006-11-16 20:32:23 +00:00			`/const char CharsetDetector::getCharsetName(int32_t index, UErrorCode &status) const`
ICU-4639 Initial checkin of C port of CharsetDetection. X-SVN-Rev: 19069 2006-02-06 18:03:11 +00:00			`{`
			`if( index > fCSRecognizers_size-1 \|\| index < 0) {`
			`status = U_INDEX_OUTOFBOUNDS_ERROR;`

			`return 0;`
			`} else {`
			`return fCSRecognizers[index]->getName();`
			`}`
ICU-5410 Comment out unused code. X-SVN-Rev: 20673 2006-11-16 20:32:23 +00:00			`}*/`
ICU-4639 Initial checkin of C port of CharsetDetection. X-SVN-Rev: 19069 2006-02-06 18:03:11 +00:00
			`U_NAMESPACE_END`

ICU-4639 Improve the thread safety and simplify the UEnumeration X-SVN-Rev: 19078 2006-02-07 07:50:53 +00:00			`U_CDECL_BEGIN`
			`typedef struct {`
			`int32_t currIndex;`
			`} Context;`



			`static void U_CALLCONV`
			`enumClose(UEnumeration *en) {`
			`if(en->context != NULL) {`
			`DELETE_ARRAY(en->context);`
			`}`

			`DELETE_ARRAY(en);`
			`}`

			`static int32_t U_CALLCONV`
ICU-4639 Make sure that the next enumeration function fails as expected. X-SVN-Rev: 19407 2006-03-22 09:48:15 +00:00			`enumCount(UEnumeration , UErrorCode ) {`
ICU-4639 Improve the thread safety and simplify the UEnumeration X-SVN-Rev: 19078 2006-02-07 07:50:53 +00:00			`return fCSRecognizers_size;`
			`}`

			`static const char* U_CALLCONV`
ICU-5445 Fix some compiler warnings X-SVN-Rev: 21283 2007-03-17 06:13:14 +00:00			`enumNext(UEnumeration en, int32_t resultLength, UErrorCode * /status/) {`
ICU-4639 Improve the thread safety and simplify the UEnumeration X-SVN-Rev: 19078 2006-02-07 07:50:53 +00:00			`if(((Context *)en->context)->currIndex >= fCSRecognizers_size) {`
ICU-5220 uenum_next() must not set an error code for just enumerating past the end X-SVN-Rev: 19989 2006-08-06 22:38:31 +00:00			`if(resultLength != NULL) {`
			`*resultLength = 0;`
			`}`
ICU-4639 Improve the thread safety and simplify the UEnumeration X-SVN-Rev: 19078 2006-02-07 07:50:53 +00:00			`return NULL;`
			`}`
			`const char currName = fCSRecognizers[((Context )en->context)->currIndex]->getName();`
ICU-5220 uenum_next() must not set an error code for just enumerating past the end X-SVN-Rev: 19989 2006-08-06 22:38:31 +00:00			`if(resultLength != NULL) {`
			`*resultLength = (int32_t)uprv_strlen(currName);`
			`}`
ICU-4639 Improve the thread safety and simplify the UEnumeration X-SVN-Rev: 19078 2006-02-07 07:50:53 +00:00			`((Context *)en->context)->currIndex++;`

			`return currName;`
			`}`

			`static void U_CALLCONV`
ICU-4639 Make sure that the next enumeration function fails as expected. X-SVN-Rev: 19407 2006-03-22 09:48:15 +00:00			`enumReset(UEnumeration en, UErrorCode ) {`
ICU-4639 Improve the thread safety and simplify the UEnumeration X-SVN-Rev: 19078 2006-02-07 07:50:53 +00:00			`((Context *)en->context)->currIndex = 0;`
			`}`

ICU-4639 Make unmodified global variables static const X-SVN-Rev: 19274 2006-02-24 19:57:04 +00:00			`static const UEnumeration gCSDetEnumeration = {`
ICU-4639 Improve the thread safety and simplify the UEnumeration X-SVN-Rev: 19078 2006-02-07 07:50:53 +00:00			`NULL,`
			`NULL,`
			`enumClose,`
			`enumCount,`
			`uenum_unextDefault,`
			`enumNext,`
			`enumReset`
			`};`

ICU-4639 Make sure that the next enumeration function fails as expected. X-SVN-Rev: 19407 2006-03-22 09:48:15 +00:00			`U_CAPI UEnumeration * U_EXPORT2`
ICU-6799 swat compiler warnings X-SVN-Rev: 25661 2009-03-27 00:37:55 +00:00			`ucsdet_getAllDetectableCharsets(const UCharsetDetector * /ucsd/, UErrorCode *status)`
ICU-4639 Improve the thread safety and simplify the UEnumeration X-SVN-Rev: 19078 2006-02-07 07:50:53 +00:00			`{`
ICU-5304 Allow source code to work again without using U_NAMESPACE_USE X-SVN-Rev: 20242 2006-09-04 16:36:21 +00:00			`U_NAMESPACE_USE`

ICU-4639 Improve the thread safety and simplify the UEnumeration X-SVN-Rev: 19078 2006-02-07 07:50:53 +00:00			`if(U_FAILURE(*status)) {`
			`return 0;`
			`}`

			`/* Initialize recognized charsets. */`
			`CharsetDetector::getDetectableCount();`

			`UEnumeration *en = NEW_ARRAY(UEnumeration, 1);`
ICU-4639 Make unmodified global variables static const X-SVN-Rev: 19274 2006-02-24 19:57:04 +00:00			`memcpy(en, &gCSDetEnumeration, sizeof(UEnumeration));`
ICU-4639 Improve the thread safety and simplify the UEnumeration X-SVN-Rev: 19078 2006-02-07 07:50:53 +00:00			`en->context = (void*)NEW_ARRAY(Context, 1);`
			`uprv_memset(en->context, 0, sizeof(Context));`
			`return en;`
			`}`
			`U_CDECL_END`
ICU-5198 Disable charset detection when UCONFIG_NO_CONVERSION is 1. X-SVN-Rev: 19622 2006-05-09 18:06:10 +00:00
			`#endif`
ICU-5445 Fix some compiler warnings X-SVN-Rev: 21283 2007-03-17 06:13:14 +00:00