ICU-8309 Fixed arabic charset detection code problem which triggered unexpected input data change.
X-SVN-Rev: 31534
This commit is contained in:
parent
c1e213af43
commit
f9c9e5b0d7
@ -1,6 +1,6 @@
|
||||
/*
|
||||
****************************************************************************
|
||||
* Copyright (C) 2005-2010, International Business Machines Corporation and *
|
||||
* Copyright (C) 2005-2012, International Business Machines Corporation and *
|
||||
* others. All Rights Reserved. *
|
||||
************************************************************************** *
|
||||
*
|
||||
@ -1178,9 +1178,12 @@ abstract class CharsetRecog_sbcs extends CharsetRecognizer {
|
||||
}
|
||||
protected void matchInit(CharsetDetector det)
|
||||
{
|
||||
prev_fInputBytes = det.fInputBytes.clone();
|
||||
byte bb[] = unshape(det.fInputBytes);
|
||||
det.setText(bb);
|
||||
assert prev_fInputBytes == null;
|
||||
prev_fInputBytes = new byte[det.fInputLen];
|
||||
System.arraycopy(det.fInputBytes, 0, prev_fInputBytes, 0, det.fInputLen);
|
||||
byte bb[] = unshape(prev_fInputBytes);
|
||||
System.arraycopy(bb, 0, det.fInputBytes, 0, bb.length);
|
||||
det.fInputLen = bb.length;
|
||||
}
|
||||
|
||||
/*
|
||||
@ -1225,8 +1228,11 @@ abstract class CharsetRecog_sbcs extends CharsetRecognizer {
|
||||
}
|
||||
|
||||
protected void matchFinish(CharsetDetector det) {
|
||||
if (prev_fInputBytes != null)
|
||||
det.setText(prev_fInputBytes);
|
||||
if (prev_fInputBytes != null) {
|
||||
System.arraycopy(prev_fInputBytes, 0, det.fInputBytes, 0, prev_fInputBytes.length);
|
||||
det.fInputLen = prev_fInputBytes.length;
|
||||
prev_fInputBytes = null;
|
||||
}
|
||||
}
|
||||
|
||||
}
|
||||
|
@ -723,6 +723,9 @@ public class TestCharsetDetector extends TestFmwk
|
||||
detector.setText(data2);
|
||||
CharsetMatch match2 = detector.detect();
|
||||
assertEquals("Expected ISO-8859-1, even though that isn't strictly correct", "ISO-8859-1", match2.getName());
|
||||
// calling detect() one more time without changing the input data
|
||||
CharsetMatch match2a = detector.detect();
|
||||
assertEquals("[second]Expected ISO-8859-1, even though that isn't strictly correct", "ISO-8859-1", match2a.getName());
|
||||
}
|
||||
|
||||
private static byte[] createData1()
|
||||
@ -1019,8 +1022,8 @@ public class TestCharsetDetector extends TestFmwk
|
||||
private static byte[] createData2()
|
||||
{
|
||||
return bytesFromString("0A D0 A1 CA B1 20 3B 3B 20 48 6F 75 72 28 73 29 0A D0 C7 C6 DA 20 3B 3B 20 57 " +
|
||||
"65 65 6B 28 73 29 0A B5 B1 B5 D8 20 CA B1 BC E4 20 3B 3B 20 6C 6F 63 61 6C 20 " +
|
||||
"74 69 6D 65 0A");
|
||||
"65 65 6B 28 73 29 0A B5 B1 B5 D8 20 CA B1 BC E4 20 3B 3B 20 6C 6F 63 61 6C 20 " +
|
||||
"74 69 6D 65 0A");
|
||||
}
|
||||
|
||||
|
||||
|
Loading…
Reference in New Issue
Block a user