ICU-8309 Fixed arabic charset detection code problem which triggered unexpected input data change.

X-SVN-Rev: 31534
2012-02-28 21:23:57 +00:00 · 2012-02-28 21:23:57 +00:00 · f9c9e5b0d7
commit f9c9e5b0d7
parent c1e213af43
2 changed files with 17 additions and 8 deletions
--- a/icu4j/main/classes/core/src/com/ibm/icu/text/CharsetRecog_sbcs.java
+++ b/icu4j/main/classes/core/src/com/ibm/icu/text/CharsetRecog_sbcs.java
@ -1,6 +1,6 @@
 /*
 ****************************************************************************
- * Copyright (C) 2005-2010, International Business Machines Corporation and *
+ * Copyright (C) 2005-2012, International Business Machines Corporation and *
 * others. All Rights Reserved.                                             *
 ************************************************************************** *
 *
@ -1178,9 +1178,12 @@ abstract class CharsetRecog_sbcs extends CharsetRecognizer {
        }
        protected void matchInit(CharsetDetector det) 
        {
-            prev_fInputBytes = det.fInputBytes.clone();
-            byte bb[] = unshape(det.fInputBytes);
-            det.setText(bb);
+            assert prev_fInputBytes == null;
+            prev_fInputBytes = new byte[det.fInputLen];
+            System.arraycopy(det.fInputBytes, 0, prev_fInputBytes, 0, det.fInputLen);
+            byte bb[] = unshape(prev_fInputBytes);
+            System.arraycopy(bb, 0, det.fInputBytes, 0, bb.length);
+            det.fInputLen = bb.length;
        }
        
        /*
@ -1225,8 +1228,11 @@ abstract class CharsetRecog_sbcs extends CharsetRecognizer {
        }
        
        protected void matchFinish(CharsetDetector det) {
-            if (prev_fInputBytes != null)
-                det.setText(prev_fInputBytes);
+            if (prev_fInputBytes != null) {
+                System.arraycopy(prev_fInputBytes, 0, det.fInputBytes, 0, prev_fInputBytes.length);
+                det.fInputLen = prev_fInputBytes.length;
+                prev_fInputBytes = null;
+            }
        }
        
    }
--- a/icu4j/main/tests/core/src/com/ibm/icu/dev/test/charsetdet/TestCharsetDetector.java
+++ b/icu4j/main/tests/core/src/com/ibm/icu/dev/test/charsetdet/TestCharsetDetector.java
@ -723,6 +723,9 @@ public class TestCharsetDetector extends TestFmwk
          detector.setText(data2);
          CharsetMatch match2 = detector.detect();
          assertEquals("Expected ISO-8859-1, even though that isn't strictly correct", "ISO-8859-1", match2.getName());
+          // calling detect() one more time without changing the input data
+          CharsetMatch match2a = detector.detect();
+          assertEquals("[second]Expected ISO-8859-1, even though that isn't strictly correct", "ISO-8859-1", match2a.getName());
      }
  
      private static byte[] createData1()
@ -1019,8 +1022,8 @@ public class TestCharsetDetector extends TestFmwk
      private static byte[] createData2()
      {
          return bytesFromString("0A D0 A1 CA B1 20 3B 3B 20 48 6F 75 72 28 73 29 0A D0 C7 C6 DA 20 3B 3B 20 57 " +
-          		                 "65 65 6B 28 73 29 0A B5 B1 B5 D8 20 CA B1 BC E4 20 3B 3B 20 6C 6F 63 61 6C 20 " +
-          		                 "74 69 6D 65 0A");
+                                 "65 65 6B 28 73 29 0A B5 B1 B5 D8 20 CA B1 BC E4 20 3B 3B 20 6C 6F 63 61 6C 20 " +
+                                 "74 69 6D 65 0A");
      }