scuffed-code/icu4c/source/test/cintltst/ucsdetst.c

/*
 ****************************************************************************
 * Copyright (c) 2005-2006, International Business Machines Corporation and *
 * others. All Rights Reserved.                                             *
 ****************************************************************************
 */

#include "unicode/utypes.h"

#include "unicode/ucsdet.h"
#include "unicode/ucnv.h"
#include "unicode/ustring.h"

#include "cintltst.h"

#include <stdlib.h>
#include <string.h>

#define ARRAY_SIZE(array) (sizeof array / sizeof array[0])

#define NEW_ARRAY(type,count) (type *) ctst_malloc((count) * sizeof(type))
#define DELETE_ARRAY(array)

static void TestConstruction(void);
static void TestUTF8(void);
static void TestUTF16(void);
static void TestC1Bytes(void);
static void TestInputFilter(void);
static void TestChaining(void);

void addUCsdetTest(TestNode** root);

void addUCsdetTest(TestNode** root)
{
    addTest(root, &TestConstruction, "ucsdetst/TestConstruction");
    addTest(root, &TestUTF8, "ucsdetst/TestUTF8");
    addTest(root, &TestUTF16, "ucsdetst/TestUTF16");
    addTest(root, &TestC1Bytes, "ucsdetst/TestC1Bytes");
    addTest(root, &TestInputFilter, "ucsdetst/TestInputFilter");
    addTest(root, &TestChaining, "ucsdetst/TestErrorChaining");
}

static int32_t preflight(const UChar *src, int32_t length, UConverter *cnv)
{
    UErrorCode status;
    char buffer[1024];
    char *dest, *destLimit = buffer + sizeof(buffer);
    const UChar *srcLimit = src + length;
    int32_t result = 0;

    do {
        dest = buffer;
        status = U_ZERO_ERROR;
        ucnv_fromUnicode(cnv, &dest, destLimit, &src, srcLimit, 0, TRUE, &status);
        result += (int32_t) (dest - buffer);
    } while (status == U_BUFFER_OVERFLOW_ERROR);

    return result;
}

static UChar *unescape(const char *src, int32_t *length)
{
    int32_t charCount = u_unescape(src, NULL, 0);
    UChar *chars = NEW_ARRAY(UChar, charCount + 1); 

    u_unescape(src, chars, charCount);

    *length = charCount;
    return chars;
}

static char *extractBytes(const UChar *src, int32_t length, const char *codepage, int32_t *byteLength)
{
    UErrorCode status = U_ZERO_ERROR;
    UConverter *cnv = ucnv_open(codepage, &status);
    int32_t byteCount = preflight(src, length, cnv);
    const UChar *srcLimit = src + length;
    char *bytes = NEW_ARRAY(char, byteCount + 1);
    char *dest = bytes, *destLimit = bytes + byteCount + 1;

    ucnv_fromUnicode(cnv, &dest, destLimit, &src, srcLimit, 0, TRUE, &status);
    ucnv_close(cnv);

    *byteLength = byteCount;
    return bytes;
}

static void freeBytes(char *bytes)
{
    DELETE_ARRAY(bytes);
}

static void TestConstruction(void)
{
    UErrorCode status = U_ZERO_ERROR;
    UCharsetDetector *csd = ucsdet_open(&status);
    UEnumeration *e = ucsdet_getAllDetectableCharsets(csd, &status);
    int32_t count = uenum_count(e, &status);
    int32_t i;

    for(i = 0; i < count; i += 1) {
        int32_t length;
        const char *name = uenum_next(e, &length, &status);

        if(name == NULL || length <= 0) {
            log_err("ucsdet_getAllDetectableCharsets() returned a null or empty name!\n");
        }
    }

    uenum_close(e);
    ucsdet_close(csd);
}

static void TestUTF8(void)
{
    UErrorCode status = U_ZERO_ERROR;
    const char *ss = "This is a string with some non-ascii characters that will "
               "be converted to UTF-8, then shoved through the detection process.  "
               "\\u0391\\u0392\\u0393\\u0394\\u0395"
               "Sure would be nice if our source could contain Unicode directly!";
    int32_t byteLength = 0, sLength = 0, dLength = 0;
    UChar *s = unescape(ss, &sLength);
    char *bytes = extractBytes(s, sLength, "UTF-8", &byteLength);
    UCharsetDetector *csd = ucsdet_open(&status);
    const UCharsetMatch *match;
    UChar *detected = NEW_ARRAY(UChar, sLength);

    ucsdet_setText(csd, bytes, byteLength, &status);
    match = ucsdet_detect(csd, &status);

    if (match == NULL) {
        log_err("Detection failure for UTF-8: got no matches.\n");
        goto bail;
    }

    dLength = ucsdet_getUChars(match, detected, sLength, &status);

    if (u_strCompare(detected, dLength, s, sLength, FALSE) != 0) {
        log_err("Round-trip test failed!\n");
    }

    ucsdet_setDeclaredEncoding(csd, "UTF-8", 5, &status); /* for coverage */

bail:
    DELETE_ARRAY(detected);
    freeBytes(bytes);
    ucsdet_close(csd);
}

static void TestUTF16(void)
{
    UErrorCode status = U_ZERO_ERROR;
    /* Notice the BOM on the start of this string */
    UChar chars[] = {
        0xFEFF, 0x0623, 0x0648, 0x0631, 0x0648, 0x0628, 0x0627, 0x002C,
        0x0020, 0x0628, 0x0631, 0x0645, 0x062c, 0x064a, 0x0627, 0x062a,
        0x0020, 0x0627, 0x0644, 0x062d, 0x0627, 0x0633, 0x0648, 0x0628,
        0x0020, 0x002b, 0x0020, 0x0627, 0x0646, 0x062a, 0x0631, 0x0646,
        0x064a, 0x062a, 0x0000};
    int32_t beLength = 0, leLength = 0, cLength = ARRAY_SIZE(chars);
    char *beBytes = extractBytes(chars, cLength, "UTF-16BE", &beLength);
    char *leBytes = extractBytes(chars, cLength, "UTF-16LE", &leLength);
    UCharsetDetector *csd = ucsdet_open(&status);
    const UCharsetMatch *match;
    const char *name;
    int32_t conf;

    ucsdet_setText(csd, beBytes, beLength, &status);
    match = ucsdet_detect(csd, &status);

    if (match == NULL) {
        log_err("Encoding detection failure for UTF-16BE: got no matches.\n");
        goto try_le;
    }

    name  = ucsdet_getName(match, &status);
    conf  = ucsdet_getConfidence(match, &status);

    if (strcmp(name, "UTF-16BE") != 0) {
        log_err("Encoding detection failure for UTF-16BE: got %s\n", name);
    }

    if (conf != 100) {
        log_err("Did not get 100%% confidence for UTF-16BE: got %d\n", conf);
    }

try_le:
    ucsdet_setText(csd, leBytes, leLength, &status);
    match = ucsdet_detect(csd, &status);

    if (match == NULL) {
        log_err("Encoding detection failure for UTF-16LE: got no matches.\n");
        goto bail;
    }

    name  = ucsdet_getName(match, &status);
    conf = ucsdet_getConfidence(match, &status);


    if (strcmp(name, "UTF-16LE") != 0) {
        log_err("Enconding detection failure for UTF-16LE: got %s\n", name);
    }

    if (conf != 100) {
        log_err("Did not get 100%% confidence for UTF-16LE: got %d\n", conf);
    }

bail:
    freeBytes(leBytes);
    freeBytes(beBytes);
    ucsdet_close(csd);
}

static void TestC1Bytes(void)
{
#if !UCONFIG_NO_LEGACY_CONVERSION
    UErrorCode status = U_ZERO_ERROR;
    const char *ssISO = "This is a small sample of some English text. Just enough to be sure that it detects correctly.";
    const char *ssWindows = "This is another small sample of some English text. Just enough to be sure that it detects correctly. It also includes some \\u201CC1\\u201D bytes.";
    int32_t sISOLength = 0, sWindowsLength = 0;
    UChar *sISO = unescape(ssISO, &sISOLength);
    UChar *sWindows = unescape(ssWindows, &sWindowsLength);
    int32_t lISO = 0, lWindows = 0;
    char *bISO = extractBytes(sISO, sISOLength, "ISO-8859-1", &lISO);
    char *bWindows = extractBytes(sWindows, sWindowsLength, "windows-1252", &lWindows);
    UCharsetDetector *csd = ucsdet_open(&status);
    const UCharsetMatch *match;
    const char *name;

    ucsdet_setText(csd, bWindows, lWindows, &status);
    match = ucsdet_detect(csd, &status);

    if (match == NULL) {
        log_err("English test with C1 bytes got no matches.\n");
        goto bail;
    }

    name  = ucsdet_getName(match, &status);

    if (strcmp(name, "windows-1252") != 0) {
        log_err("English text with C1 bytes does not detect as windows-1252, but as %s\n", name);
    }

    ucsdet_setText(csd, bISO, lISO, &status);
    match = ucsdet_detect(csd, &status);

    if (match == NULL) {
        log_err("English text without C1 bytes got no matches.\n");
        goto bail;
    }

    name  = ucsdet_getName(match, &status);

    if (strcmp(name, "ISO-8859-1") != 0) {
        log_err("English text without C1 bytes does not detect as ISO-8859-1, but as %s\n", name);
    }

bail:
    freeBytes(bWindows);
    freeBytes(bISO);

    ucsdet_close(csd);
#endif
}

static void TestInputFilter(void)
{
    UErrorCode status = U_ZERO_ERROR;
    const char *ss = "<a> <lot> <of> <English> <inside> <the> <markup> Un tr\\u00E8s petit peu de Fran\\u00E7ais. <to> <confuse> <the> <detector>";
    int32_t sLength = 0;
    UChar *s  = unescape(ss, &sLength);
    int32_t byteLength = 0;
    char *bytes = extractBytes(s, sLength, "ISO-8859-1", &byteLength);
    UCharsetDetector *csd = ucsdet_open(&status);
    const UCharsetMatch *match;
    const char *lang, *name;

    ucsdet_enableInputFilter(csd, TRUE);

    if (!ucsdet_isInputFilterEnabled(csd)) {
        log_err("ucsdet_enableInputFilter(csd, TRUE) did not enable input filter!\n");
    }


    ucsdet_setText(csd, bytes, byteLength, &status);
    match = ucsdet_detect(csd, &status);

    if (match == NULL) {
        log_err("Turning on the input filter resulted in no matches.\n");
        goto turn_off;
    }

    name = ucsdet_getName(match, &status);

    if (name == NULL || strcmp(name, "ISO-8859-1") != 0) {
        log_err("Turning on the input filter resulted in %s rather than ISO-8859-1\n", name);
    } else {
        lang = ucsdet_getLanguage(match, &status);

        if (lang == NULL || strcmp(lang, "fr") != 0) {
            log_err("Input filter did not strip markup!\n");
        }
    }

turn_off:
    ucsdet_enableInputFilter(csd, FALSE);
    ucsdet_setText(csd, bytes, byteLength, &status);
    match = ucsdet_detect(csd, &status);

    if (match == NULL) {
        log_err("Turning off the input filter resulted in no matches.\n");
        goto bail;
    }

    name = ucsdet_getName(match, &status);

    if (name == NULL || strcmp(name, "ISO-8859-1") != 0) {
        log_err("Turning off the input filter resulted in %s rather than ISO-8859-1\n", name);
    } else {
        lang = ucsdet_getLanguage(match, &status);

        if (lang == NULL || strcmp(lang, "en") != 0) {
            log_err("Unfiltered input did not detect as English!\n");
        }
    }

bail:
    freeBytes(bytes);
    ucsdet_close(csd);
}

static void TestChaining(void) {
    UErrorCode status = U_USELESS_COLLATOR_ERROR;

    ucsdet_open(&status);
    ucsdet_setText(NULL, NULL, 0, &status);
    ucsdet_getName(NULL, &status);
    ucsdet_getConfidence(NULL, &status);
    ucsdet_getLanguage(NULL, &status);
    ucsdet_detect(NULL, &status);
    ucsdet_setDeclaredEncoding(NULL, NULL, 0, &status);
    ucsdet_detectAll(NULL, NULL, &status);
    ucsdet_getUChars(NULL, NULL, 0, &status);
    ucsdet_getUChars(NULL, NULL, 0, &status);
    ucsdet_close(NULL);

    /* All of this code should have done nothing. */
    if (status != U_USELESS_COLLATOR_ERROR) {
        log_err("Status got changed to %s\n", u_errorName(status));
    }
}
ICU-4639 Initial version of C test. Clean up error messages. X-SVN-Rev: 19137 2006-02-10 23:49:09 +00:00			`/*`
			`****************************************************************************`
			`* Copyright (c) 2005-2006, International Business Machines Corporation and *`
			`* others. All Rights Reserved. *`
			`****************************************************************************`
			`*/`

			`#include "unicode/utypes.h"`

			`#include "unicode/ucsdet.h"`
			`#include "unicode/ucnv.h"`
			`#include "unicode/ustring.h"`

			`#include "cintltst.h"`

			`#include <stdlib.h>`
			`#include <string.h>`

			`#define ARRAY_SIZE(array) (sizeof array / sizeof array[0])`

			`#define NEW_ARRAY(type,count) (type ) ctst_malloc((count) sizeof(type))`
			`#define DELETE_ARRAY(array)`

			`static void TestConstruction(void);`
			`static void TestUTF8(void);`
			`static void TestUTF16(void);`
			`static void TestC1Bytes(void);`
			`static void TestInputFilter(void);`
ICU-5032 Increase code coverage. X-SVN-Rev: 19431 2006-03-24 23:29:08 +00:00			`static void TestChaining(void);`
ICU-4639 Initial version of C test. Clean up error messages. X-SVN-Rev: 19137 2006-02-10 23:49:09 +00:00
			`void addUCsdetTest(TestNode** root);`

			`void addUCsdetTest(TestNode** root)`
			`{`
ICU-5032 Increase code coverage. X-SVN-Rev: 19431 2006-03-24 23:29:08 +00:00			`addTest(root, &TestConstruction, "ucsdetst/TestConstruction");`
			`addTest(root, &TestUTF8, "ucsdetst/TestUTF8");`
			`addTest(root, &TestUTF16, "ucsdetst/TestUTF16");`
			`addTest(root, &TestC1Bytes, "ucsdetst/TestC1Bytes");`
			`addTest(root, &TestInputFilter, "ucsdetst/TestInputFilter");`
			`addTest(root, &TestChaining, "ucsdetst/TestErrorChaining");`
ICU-4639 Initial version of C test. Clean up error messages. X-SVN-Rev: 19137 2006-02-10 23:49:09 +00:00			`}`

			`static int32_t preflight(const UChar src, int32_t length, UConverter cnv)`
			`{`
			`UErrorCode status;`
			`char buffer[1024];`
			`char dest, destLimit = buffer + sizeof(buffer);`
			`const UChar *srcLimit = src + length;`
			`int32_t result = 0;`

			`do {`
			`dest = buffer;`
			`status = U_ZERO_ERROR;`
			`ucnv_fromUnicode(cnv, &dest, destLimit, &src, srcLimit, 0, TRUE, &status);`
			`result += (int32_t) (dest - buffer);`
			`} while (status == U_BUFFER_OVERFLOW_ERROR);`

			`return result;`
			`}`

			`static UChar unescape(const char src, int32_t *length)`
			`{`
			`int32_t charCount = u_unescape(src, NULL, 0);`
			`UChar *chars = NEW_ARRAY(UChar, charCount + 1);`

			`u_unescape(src, chars, charCount);`

			`*length = charCount;`
			`return chars;`
			`}`

			`static char extractBytes(const UChar src, int32_t length, const char codepage, int32_t byteLength)`
			`{`
			`UErrorCode status = U_ZERO_ERROR;`
			`UConverter *cnv = ucnv_open(codepage, &status);`
			`int32_t byteCount = preflight(src, length, cnv);`
			`const UChar *srcLimit = src + length;`
			`char *bytes = NEW_ARRAY(char, byteCount + 1);`
			`char dest = bytes, destLimit = bytes + byteCount + 1;`

			`ucnv_fromUnicode(cnv, &dest, destLimit, &src, srcLimit, 0, TRUE, &status);`
			`ucnv_close(cnv);`

			`*byteLength = byteCount;`
			`return bytes;`
			`}`

			`static void freeBytes(char *bytes)`
			`{`
			`DELETE_ARRAY(bytes);`
			`}`

			`static void TestConstruction(void)`
			`{`
			`UErrorCode status = U_ZERO_ERROR;`
			`UCharsetDetector *csd = ucsdet_open(&status);`
			`UEnumeration *e = ucsdet_getAllDetectableCharsets(csd, &status);`
			`int32_t count = uenum_count(e, &status);`
			`int32_t i;`

			`for(i = 0; i < count; i += 1) {`
			`int32_t length;`
			`const char *name = uenum_next(e, &length, &status);`

			`if(name == NULL \|\| length <= 0) {`
			`log_err("ucsdet_getAllDetectableCharsets() returned a null or empty name!\n");`
			`}`
			`}`

			`uenum_close(e);`
			`ucsdet_close(csd);`
			`}`

			`static void TestUTF8(void)`
			`{`
			`UErrorCode status = U_ZERO_ERROR;`
ICU-4707 Fix some compiler warnings. X-SVN-Rev: 19415 2006-03-23 04:11:36 +00:00			`const char *ss = "This is a string with some non-ascii characters that will "`
ICU-4639 Initial version of C test. Clean up error messages. X-SVN-Rev: 19137 2006-02-10 23:49:09 +00:00			`"be converted to UTF-8, then shoved through the detection process. "`
			`"\\u0391\\u0392\\u0393\\u0394\\u0395"`
			`"Sure would be nice if our source could contain Unicode directly!";`
			`int32_t byteLength = 0, sLength = 0, dLength = 0;`
			`UChar *s = unescape(ss, &sLength);`
			`char *bytes = extractBytes(s, sLength, "UTF-8", &byteLength);`
			`UCharsetDetector *csd = ucsdet_open(&status);`
			`const UCharsetMatch *match;`
			`UChar *detected = NEW_ARRAY(UChar, sLength);`

			`ucsdet_setText(csd, bytes, byteLength, &status);`
			`match = ucsdet_detect(csd, &status);`

			`if (match == NULL) {`
			`log_err("Detection failure for UTF-8: got no matches.\n");`
			`goto bail;`
			`}`

			`dLength = ucsdet_getUChars(match, detected, sLength, &status);`

			`if (u_strCompare(detected, dLength, s, sLength, FALSE) != 0) {`
			`log_err("Round-trip test failed!\n");`
			`}`

			`ucsdet_setDeclaredEncoding(csd, "UTF-8", 5, &status); /* for coverage */`

			`bail:`
			`DELETE_ARRAY(detected);`
			`freeBytes(bytes);`
			`ucsdet_close(csd);`
			`}`

			`static void TestUTF16(void)`
			`{`
			`UErrorCode status = U_ZERO_ERROR;`
			`/* Notice the BOM on the start of this string */`
			`UChar chars[] = {`
			`0xFEFF, 0x0623, 0x0648, 0x0631, 0x0648, 0x0628, 0x0627, 0x002C,`
			`0x0020, 0x0628, 0x0631, 0x0645, 0x062c, 0x064a, 0x0627, 0x062a,`
			`0x0020, 0x0627, 0x0644, 0x062d, 0x0627, 0x0633, 0x0648, 0x0628,`
			`0x0020, 0x002b, 0x0020, 0x0627, 0x0646, 0x062a, 0x0631, 0x0646,`
			`0x064a, 0x062a, 0x0000};`
			`int32_t beLength = 0, leLength = 0, cLength = ARRAY_SIZE(chars);`
			`char *beBytes = extractBytes(chars, cLength, "UTF-16BE", &beLength);`
			`char *leBytes = extractBytes(chars, cLength, "UTF-16LE", &leLength);`
			`UCharsetDetector *csd = ucsdet_open(&status);`
			`const UCharsetMatch *match;`
			`const char *name;`
			`int32_t conf;`

			`ucsdet_setText(csd, beBytes, beLength, &status);`
			`match = ucsdet_detect(csd, &status);`

			`if (match == NULL) {`
			`log_err("Encoding detection failure for UTF-16BE: got no matches.\n");`
			`goto try_le;`
			`}`

			`name = ucsdet_getName(match, &status);`
			`conf = ucsdet_getConfidence(match, &status);`

			`if (strcmp(name, "UTF-16BE") != 0) {`
			`log_err("Encoding detection failure for UTF-16BE: got %s\n", name);`
			`}`

			`if (conf != 100) {`
			`log_err("Did not get 100%% confidence for UTF-16BE: got %d\n", conf);`
			`}`

			`try_le:`
			`ucsdet_setText(csd, leBytes, leLength, &status);`
			`match = ucsdet_detect(csd, &status);`

			`if (match == NULL) {`
			`log_err("Encoding detection failure for UTF-16LE: got no matches.\n");`
			`goto bail;`
			`}`

			`name = ucsdet_getName(match, &status);`
			`conf = ucsdet_getConfidence(match, &status);`


			`if (strcmp(name, "UTF-16LE") != 0) {`
			`log_err("Enconding detection failure for UTF-16LE: got %s\n", name);`
			`}`

			`if (conf != 100) {`
			`log_err("Did not get 100%% confidence for UTF-16LE: got %d\n", conf);`
			`}`

			`bail:`
			`freeBytes(leBytes);`
			`freeBytes(beBytes);`
			`ucsdet_close(csd);`
			`}`

			`static void TestC1Bytes(void)`
			`{`
ICU-5282 Fix problems found by uconfigtest. X-SVN-Rev: 19922 2006-07-28 22:58:29 +00:00			`#if !UCONFIG_NO_LEGACY_CONVERSION`
ICU-4639 Initial version of C test. Clean up error messages. X-SVN-Rev: 19137 2006-02-10 23:49:09 +00:00			`UErrorCode status = U_ZERO_ERROR;`
ICU-4707 Fix some compiler warnings. X-SVN-Rev: 19415 2006-03-23 04:11:36 +00:00			`const char *ssISO = "This is a small sample of some English text. Just enough to be sure that it detects correctly.";`
			`const char *ssWindows = "This is another small sample of some English text. Just enough to be sure that it detects correctly. It also includes some \\u201CC1\\u201D bytes.";`
ICU-4639 Initial version of C test. Clean up error messages. X-SVN-Rev: 19137 2006-02-10 23:49:09 +00:00			`int32_t sISOLength = 0, sWindowsLength = 0;`
			`UChar *sISO = unescape(ssISO, &sISOLength);`
			`UChar *sWindows = unescape(ssWindows, &sWindowsLength);`
			`int32_t lISO = 0, lWindows = 0;`
			`char *bISO = extractBytes(sISO, sISOLength, "ISO-8859-1", &lISO);`
			`char *bWindows = extractBytes(sWindows, sWindowsLength, "windows-1252", &lWindows);`
			`UCharsetDetector *csd = ucsdet_open(&status);`
			`const UCharsetMatch *match;`
			`const char *name;`

			`ucsdet_setText(csd, bWindows, lWindows, &status);`
			`match = ucsdet_detect(csd, &status);`

			`if (match == NULL) {`
			`log_err("English test with C1 bytes got no matches.\n");`
			`goto bail;`
			`}`

			`name = ucsdet_getName(match, &status);`

			`if (strcmp(name, "windows-1252") != 0) {`
			`log_err("English text with C1 bytes does not detect as windows-1252, but as %s\n", name);`
			`}`

			`ucsdet_setText(csd, bISO, lISO, &status);`
			`match = ucsdet_detect(csd, &status);`

			`if (match == NULL) {`
			`log_err("English text without C1 bytes got no matches.\n");`
			`goto bail;`
			`}`

			`name = ucsdet_getName(match, &status);`

			`if (strcmp(name, "ISO-8859-1") != 0) {`
			`log_err("English text without C1 bytes does not detect as ISO-8859-1, but as %s\n", name);`
			`}`

			`bail:`
			`freeBytes(bWindows);`
			`freeBytes(bISO);`

			`ucsdet_close(csd);`
ICU-5282 Fix problems found by uconfigtest. X-SVN-Rev: 19922 2006-07-28 22:58:29 +00:00			`#endif`
ICU-4639 Initial version of C test. Clean up error messages. X-SVN-Rev: 19137 2006-02-10 23:49:09 +00:00			`}`

			`static void TestInputFilter(void)`
			`{`
			`UErrorCode status = U_ZERO_ERROR;`
ICU-4707 Fix some compiler warnings. X-SVN-Rev: 19415 2006-03-23 04:11:36 +00:00			`const char *ss = "<a> <lot> <of> <English> <inside> <the> <markup> Un tr\\u00E8s petit peu de Fran\\u00E7ais. <to> <confuse> <the> <detector>";`
ICU-4639 Initial version of C test. Clean up error messages. X-SVN-Rev: 19137 2006-02-10 23:49:09 +00:00			`int32_t sLength = 0;`
			`UChar *s = unescape(ss, &sLength);`
			`int32_t byteLength = 0;`
			`char *bytes = extractBytes(s, sLength, "ISO-8859-1", &byteLength);`
			`UCharsetDetector *csd = ucsdet_open(&status);`
			`const UCharsetMatch *match;`
			`const char lang, name;`

			`ucsdet_enableInputFilter(csd, TRUE);`

			`if (!ucsdet_isInputFilterEnabled(csd)) {`
			`log_err("ucsdet_enableInputFilter(csd, TRUE) did not enable input filter!\n");`
			`}`


			`ucsdet_setText(csd, bytes, byteLength, &status);`
			`match = ucsdet_detect(csd, &status);`

			`if (match == NULL) {`
			`log_err("Turning on the input filter resulted in no matches.\n");`
			`goto turn_off;`
			`}`

			`name = ucsdet_getName(match, &status);`

			`if (name == NULL \|\| strcmp(name, "ISO-8859-1") != 0) {`
			`log_err("Turning on the input filter resulted in %s rather than ISO-8859-1\n", name);`
			`} else {`
			`lang = ucsdet_getLanguage(match, &status);`

			`if (lang == NULL \|\| strcmp(lang, "fr") != 0) {`
			`log_err("Input filter did not strip markup!\n");`
			`}`
			`}`

			`turn_off:`
			`ucsdet_enableInputFilter(csd, FALSE);`
			`ucsdet_setText(csd, bytes, byteLength, &status);`
			`match = ucsdet_detect(csd, &status);`

			`if (match == NULL) {`
			`log_err("Turning off the input filter resulted in no matches.\n");`
			`goto bail;`
			`}`

			`name = ucsdet_getName(match, &status);`

			`if (name == NULL \|\| strcmp(name, "ISO-8859-1") != 0) {`
			`log_err("Turning off the input filter resulted in %s rather than ISO-8859-1\n", name);`
			`} else {`
			`lang = ucsdet_getLanguage(match, &status);`

			`if (lang == NULL \|\| strcmp(lang, "en") != 0) {`
			`log_err("Unfiltered input did not detect as English!\n");`
			`}`
			`}`

			`bail:`
			`freeBytes(bytes);`
			`ucsdet_close(csd);`
			`}`

ICU-5032 Increase code coverage. X-SVN-Rev: 19431 2006-03-24 23:29:08 +00:00			`static void TestChaining(void) {`
			`UErrorCode status = U_USELESS_COLLATOR_ERROR;`

			`ucsdet_open(&status);`
			`ucsdet_setText(NULL, NULL, 0, &status);`
			`ucsdet_getName(NULL, &status);`
			`ucsdet_getConfidence(NULL, &status);`
			`ucsdet_getLanguage(NULL, &status);`
			`ucsdet_detect(NULL, &status);`
			`ucsdet_setDeclaredEncoding(NULL, NULL, 0, &status);`
			`ucsdet_detectAll(NULL, NULL, &status);`
			`ucsdet_getUChars(NULL, NULL, 0, &status);`
			`ucsdet_getUChars(NULL, NULL, 0, &status);`
			`ucsdet_close(NULL);`

			`/* All of this code should have done nothing. */`
			`if (status != U_USELESS_COLLATOR_ERROR) {`
			`log_err("Status got changed to %s\n", u_errorName(status));`
			`}`
			`}`