ICU-8309 Fixed arabic charset detection code problem which triggered unexpected input data change.

X-SVN-Rev: 31534
This commit is contained in:
Yoshito Umaoka 2012-02-28 21:23:57 +00:00
parent c1e213af43
commit f9c9e5b0d7
2 changed files with 17 additions and 8 deletions

View File

@ -1,6 +1,6 @@
/*
****************************************************************************
* Copyright (C) 2005-2010, International Business Machines Corporation and *
* Copyright (C) 2005-2012, International Business Machines Corporation and *
* others. All Rights Reserved. *
************************************************************************** *
*
@ -1178,9 +1178,12 @@ abstract class CharsetRecog_sbcs extends CharsetRecognizer {
}
protected void matchInit(CharsetDetector det)
{
prev_fInputBytes = det.fInputBytes.clone();
byte bb[] = unshape(det.fInputBytes);
det.setText(bb);
assert prev_fInputBytes == null;
prev_fInputBytes = new byte[det.fInputLen];
System.arraycopy(det.fInputBytes, 0, prev_fInputBytes, 0, det.fInputLen);
byte bb[] = unshape(prev_fInputBytes);
System.arraycopy(bb, 0, det.fInputBytes, 0, bb.length);
det.fInputLen = bb.length;
}
/*
@ -1225,8 +1228,11 @@ abstract class CharsetRecog_sbcs extends CharsetRecognizer {
}
protected void matchFinish(CharsetDetector det) {
if (prev_fInputBytes != null)
det.setText(prev_fInputBytes);
if (prev_fInputBytes != null) {
System.arraycopy(prev_fInputBytes, 0, det.fInputBytes, 0, prev_fInputBytes.length);
det.fInputLen = prev_fInputBytes.length;
prev_fInputBytes = null;
}
}
}

View File

@ -723,6 +723,9 @@ public class TestCharsetDetector extends TestFmwk
detector.setText(data2);
CharsetMatch match2 = detector.detect();
assertEquals("Expected ISO-8859-1, even though that isn't strictly correct", "ISO-8859-1", match2.getName());
// calling detect() one more time without changing the input data
CharsetMatch match2a = detector.detect();
assertEquals("[second]Expected ISO-8859-1, even though that isn't strictly correct", "ISO-8859-1", match2a.getName());
}
private static byte[] createData1()
@ -1019,8 +1022,8 @@ public class TestCharsetDetector extends TestFmwk
private static byte[] createData2()
{
return bytesFromString("0A D0 A1 CA B1 20 3B 3B 20 48 6F 75 72 28 73 29 0A D0 C7 C6 DA 20 3B 3B 20 57 " +
"65 65 6B 28 73 29 0A B5 B1 B5 D8 20 CA B1 BC E4 20 3B 3B 20 6C 6F 63 61 6C 20 " +
"74 69 6D 65 0A");
"65 65 6B 28 73 29 0A B5 B1 B5 D8 20 CA B1 BC E4 20 3B 3B 20 6C 6F 63 61 6C 20 " +
"74 69 6D 65 0A");
}