ICU-6778 Update intltest csdet.
X-SVN-Rev: 25770
This commit is contained in:
parent
7fc8dc6571
commit
e526f90477
@ -546,9 +546,36 @@ void CharsetDetectionTest::IBM424Test()
|
||||
0x05D8, 0x05EA, 0x05D5, 0x0020, 0x05DC, 0x05D0, 0x05D7, 0x05E8, 0x0020, 0x05E9, 0x05E2, 0x05D9, 0x05D9, 0x05DF, 0x0020, 0x05D1,
|
||||
0x05EA, 0x05DE, 0x05DC, 0x05D9, 0x05DC, 0x0020, 0x05D4, 0x05E2, 0x05D3, 0x05D5, 0x05D9, 0x05D5, 0x05EA, 0x0000
|
||||
};
|
||||
UnicodeString s(chars);
|
||||
int32_t bLength = 0;
|
||||
char *bytes = extractBytes(s, "IBM424", bLength);
|
||||
|
||||
static const UChar chars_reverse[] = {
|
||||
0x05EA, 0x05D5, 0x05D9, 0x05D5, 0x05D3, 0x05E2, 0x05D4, 0x0020, 0x05DC, 0x05D9, 0x05DC, 0x05DE, 0x05EA,
|
||||
0x05D1, 0x0020, 0x05DF, 0x05D9, 0x05D9, 0x05E2, 0x05E9, 0x0020, 0x05E8, 0x05D7, 0x05D0, 0x05DC, 0x0020, 0x05D5, 0x05EA, 0x05D8,
|
||||
0x05DC, 0x05D7, 0x05D4, 0x0020, 0x05EA, 0x05D0, 0x0020, 0x05DC, 0x05D1, 0x05D9, 0x05E7, 0x0020, 0x05D8, 0x05D9, 0x05DC, 0x05D1,
|
||||
0x05DC, 0x05D3, 0x05E0, 0x05DE, 0x0020, 0x002E, 0x0022, 0x05D4, 0x05E7, 0x05D5, 0x05E6, 0x05D9, 0x0020, 0x05EA, 0x05E8, 0x05E4,
|
||||
0x05D5, 0x05E2, 0x0020, 0x05E2, 0x05E6, 0x05D1, 0x05DE, 0x0020, 0x05DA, 0x05DC, 0x05D4, 0x05DE, 0x05D1, 0x0020, 0x05DD, 0x05D9,
|
||||
0x05DC, 0x05D9, 0x05D9, 0x05D7, 0x0020, 0x05DC, 0x05E9, 0x0020, 0x05D4, 0x05E8, 0x05D5, 0x05D0, 0x05DB, 0x05DC, 0x0020, 0x05D4,
|
||||
0x05DC, 0x05D5, 0x05E1, 0x05E4, 0x0020, 0x05EA, 0x05D5, 0x05D2, 0x05D4, 0x05E0, 0x05EA, 0x05D4, 0x0022, 0x0020, 0x05DC, 0x05E9,
|
||||
0x0020, 0x05D4, 0x05E0, 0x05D5, 0x05DE, 0x05EA, 0x0020, 0x05D4, 0x05DC, 0x05D5, 0x05E2, 0x0020, 0x05EA, 0x05D5, 0x05D9, 0x05D5,
|
||||
0x05D3, 0x05E2, 0x05D4, 0x05DE, 0x0020, 0x002C, 0x05E8, 0x0022, 0x05E6, 0x05E4, 0x05D4, 0x0020, 0x05D9, 0x05E8, 0x05D1, 0x05D3,
|
||||
0x05DC, 0x0020, 0x002E, 0x05D4, 0x05D6, 0x05E2, 0x0020, 0x05EA, 0x05E2, 0x05D5, 0x05E6, 0x05E8, 0x0020, 0x002B, 0x05D1, 0x0020,
|
||||
0x05D4, 0x05E7, 0x05D5, 0x05E6, 0x05D9, 0x0020, 0x05EA, 0x05E8, 0x05E4, 0x05D5, 0x05E2, 0x0020, 0x05E2, 0x05E6, 0x05D1, 0x05DE,
|
||||
0x05DE, 0x0020, 0x05DC, 0x0022, 0x05D4, 0x05E6, 0x0020, 0x05D9, 0x05DC, 0x05D9, 0x05D9, 0x05D7, 0x0020, 0x05EA, 0x05D5, 0x05D9,
|
||||
0x05D5, 0x05D3, 0x05E2, 0x0020, 0x05EA, 0x05D5, 0x05D1, 0x05E7, 0x05E2, 0x05D1, 0x0020, 0x05D7, 0x0022, 0x05E6, 0x05DE, 0x0020,
|
||||
0x05EA, 0x05E8, 0x05D9, 0x05E7, 0x05D7, 0x0020, 0x05EA, 0x05D7, 0x05D9, 0x05EA, 0x05E4, 0x0020, 0x05DC, 0x05E2, 0x0020, 0x05D4,
|
||||
0x05E8, 0x05D5, 0x05D4, 0x0020, 0x002C, 0x05D8, 0x05D9, 0x05DC, 0x05D1, 0x05DC, 0x05D3, 0x05E0, 0x05DE, 0x0020, 0x05D9, 0x05D7,
|
||||
0x05D9, 0x05D1, 0x05D0, 0x0020, 0x05E3, 0x05D5, 0x05DC, 0x05D0, 0x0020, 0x05EA, 0x05EA, 0x0020, 0x002C, 0x05D9, 0x05E9, 0x05D0,
|
||||
0x05E8, 0x05D4, 0x0020, 0x05D9, 0x05D0, 0x05D1, 0x05E6, 0x05D4, 0x0020, 0x05D8, 0x05D9, 0x05DC, 0x05E7, 0x05E8, 0x05E4, 0x05D4,
|
||||
0x0000
|
||||
};
|
||||
|
||||
int32_t bLength = 0, brLength = 0;
|
||||
|
||||
UnicodeString s1(chars);
|
||||
UnicodeString s2(chars_reverse);
|
||||
|
||||
char *bytes = extractBytes(s1, "IBM424", bLength);
|
||||
char *bytes_r = extractBytes(s2, "IBM424", brLength);
|
||||
|
||||
UCharsetDetector *csd = ucsdet_open(&status);
|
||||
const UCharsetMatch *match;
|
||||
const char *name;
|
||||
@ -557,17 +584,31 @@ void CharsetDetectionTest::IBM424Test()
|
||||
match = ucsdet_detect(csd, &status);
|
||||
|
||||
if (match == NULL) {
|
||||
errln("Encoding detection failure for IBM424: got no matches.\n");
|
||||
errln("Encoding detection failure for IBM424_rtl: got no matches.\n");
|
||||
goto bail;
|
||||
}
|
||||
|
||||
name = ucsdet_getName(match, &status);
|
||||
if (strcmp(name, "IBM424") != 0) {
|
||||
errln("Encoding detection failure for IBM424: got %s\n", name);
|
||||
if (strcmp(name, "IBM424_rtl") != 0) {
|
||||
errln("Encoding detection failure for IBM424_rtl: got %s\n", name);
|
||||
}
|
||||
|
||||
ucsdet_setText(csd, bytes_r, brLength, &status);
|
||||
match = ucsdet_detect(csd, &status);
|
||||
|
||||
if (match == NULL) {
|
||||
errln("Encoding detection failure for IBM424_ltr: got no matches.\n");
|
||||
goto bail;
|
||||
}
|
||||
|
||||
name = ucsdet_getName(match, &status);
|
||||
if (strcmp(name, "IBM424_ltr") != 0) {
|
||||
errln("Encoding detection failure for IBM424_ltr: got %s\n", name);
|
||||
}
|
||||
|
||||
bail:
|
||||
freeBytes(bytes);
|
||||
freeBytes(bytes_r);
|
||||
ucsdet_close(csd);
|
||||
}
|
||||
|
||||
@ -591,9 +632,31 @@ void CharsetDetectionTest::IBM420Test()
|
||||
0x062F, 0x064A, 0x0629, 0x0020, 0x0648, 0x0627, 0x0644, 0x0627, 0x062C, 0x062A, 0x0645, 0x0627, 0x0639, 0x064A, 0x0629, 0x002E,
|
||||
0x0000
|
||||
};
|
||||
UnicodeString s(chars);
|
||||
int32_t bLength = 0;
|
||||
char *bytes = extractBytes(s, "IBM420", bLength);
|
||||
static const UChar chars_reverse[] = {
|
||||
0x002E, 0x0629, 0x064A, 0x0639, 0x0627, 0x0645, 0x062A, 0x062C, 0x0627, 0x0644, 0x0627, 0x0648, 0x0020, 0x0629, 0x064A, 0x062F,
|
||||
0x0627, 0x0635, 0x062A, 0x0642, 0x0627, 0x0644, 0x0627, 0x0020, 0x0631, 0x0637, 0x0627, 0x062E, 0x0645, 0x0644, 0x0627, 0x0020,
|
||||
0x0647, 0x062C, 0x0648, 0x0628, 0x0020, 0x0644, 0x064A, 0x0626, 0x0627, 0x0631, 0x0633, 0x0627, 0x0020, 0x0629, 0x0644, 0x0648,
|
||||
0x062F, 0x0020, 0x0646, 0x0627, 0x0643, 0x0633, 0x0644, 0x0020, 0x0646, 0x0627, 0x0645, 0x0623, 0x0020, 0x0629, 0x0643, 0x0628,
|
||||
0x0634, 0x0020, 0x0646, 0x0627, 0x0645, 0x0636, 0x0020, 0x0641, 0x062F, 0x0647, 0x0628, 0x0020, 0x0629, 0x0633, 0x0633, 0x0624,
|
||||
0x0645, 0x0644, 0x0627, 0x0020, 0x062A, 0x0631, 0x0648, 0x0637, 0x062A, 0x0648, 0x0020, 0x062A, 0x0639, 0x0633, 0x0648, 0x062A,
|
||||
0x0020, 0x002E, 0x0629, 0x0644, 0x0648, 0x062F, 0x0644, 0x0644, 0x0648, 0x0020, 0x0639, 0x0645, 0x062A, 0x062C, 0x0645, 0x0644,
|
||||
0x0644, 0x0020, 0x0629, 0x0631, 0x064A, 0x063A, 0x062A, 0x0645, 0x0644, 0x0627, 0x0020, 0x062A, 0x0627, 0x062C, 0x0627, 0x064A,
|
||||
0x062A, 0x062D, 0x0627, 0x0644, 0x0644, 0x0020, 0x064B, 0x0627, 0x0645, 0x0626, 0x0627, 0x062F, 0x0020, 0x0627, 0x0647, 0x062A,
|
||||
0x0645, 0x0626, 0x0627, 0x0644, 0x0645, 0x0020, 0x0639, 0x0645, 0x0020, 0x002C, 0x064A, 0x0646, 0x0637, 0x0648, 0x0644, 0x0627,
|
||||
0x0020, 0x0646, 0x064A, 0x0645, 0x0623, 0x062A, 0x0644, 0x0627, 0x0020, 0x0629, 0x0633, 0x0633, 0x0624, 0x0645, 0x0020, 0x064A,
|
||||
0x0641, 0x0020, 0x0629, 0x062F, 0x064A, 0x062F, 0x0639, 0x0020, 0x0646, 0x064A, 0x0645, 0x0623, 0x062A, 0x0020, 0x062C, 0x0645,
|
||||
0x0627, 0x0631, 0x0628, 0x0020, 0x062A, 0x0630, 0x0641, 0x064F, 0x0646, 0x0648, 0x0020, 0x062A, 0x0639, 0x0636, 0x064F, 0x0648,
|
||||
0x0000,
|
||||
};
|
||||
|
||||
int32_t bLength = 0, brLength = 0;
|
||||
|
||||
UnicodeString s1(chars);
|
||||
UnicodeString s2(chars_reverse);
|
||||
|
||||
char *bytes = extractBytes(s1, "IBM420", bLength);
|
||||
char *bytes_r = extractBytes(s2, "IBM420", brLength);
|
||||
|
||||
UCharsetDetector *csd = ucsdet_open(&status);
|
||||
const UCharsetMatch *match;
|
||||
const char *name;
|
||||
@ -602,16 +665,30 @@ void CharsetDetectionTest::IBM420Test()
|
||||
match = ucsdet_detect(csd, &status);
|
||||
|
||||
if (match == NULL) {
|
||||
errln("Encoding detection failure for IBM420: got no matches.\n");
|
||||
errln("Encoding detection failure for IBM420_rtl: got no matches.\n");
|
||||
goto bail;
|
||||
}
|
||||
|
||||
name = ucsdet_getName(match, &status);
|
||||
if (strcmp(name, "IBM420") != 0) {
|
||||
errln("Encoding detection failure for IBM420: got %s\n", name);
|
||||
if (strcmp(name, "IBM420_rtl") != 0) {
|
||||
errln("Encoding detection failure for IBM420_rtl: got %s\n", name);
|
||||
}
|
||||
|
||||
ucsdet_setText(csd, bytes_r, brLength, &status);
|
||||
match = ucsdet_detect(csd, &status);
|
||||
|
||||
if (match == NULL) {
|
||||
errln("Encoding detection failure for IBM420_ltr: got no matches.\n");
|
||||
goto bail;
|
||||
}
|
||||
|
||||
name = ucsdet_getName(match, &status);
|
||||
if (strcmp(name, "IBM420_ltr") != 0) {
|
||||
errln("Encoding detection failure for IBM420_ltr: got %s\n", name);
|
||||
}
|
||||
|
||||
bail:
|
||||
freeBytes(bytes);
|
||||
freeBytes(bytes_r);
|
||||
ucsdet_close(csd);
|
||||
}
|
||||
|
Loading…
Reference in New Issue
Block a user