ICU-47

Allowed string length as an offset in findOffsetFromCodePoint, findCodePointOffset and countCodePoint. X-SVN-Rev: 5566
2001-08-23 02:21:07 +00:00 · 2001-08-23 02:21:07 +00:00 · 26d5e85247
commit 26d5e85247
parent f9081a2e8e
4 changed files with 142 additions and 20 deletions
--- a/icu4j/src/com/ibm/icu/dev/test/lang/UTF16Test.java
+++ b/icu4j/src/com/ibm/icu/dev/test/lang/UTF16Test.java
@ -5,8 +5,8 @@
 *******************************************************************************
 *
 * $Source: /xsrl/Nsvn/icu/icu4j/src/com/ibm/icu/dev/test/lang/UTF16Test.java,v $ 
-* $Date: 2001/03/23 19:52:03 $ 
-* $Revision: 1.5 $
+* $Date: 2001/08/23 02:20:59 $ 
+* $Revision: 1.6 $
 *
 *******************************************************************************
 */
@ -209,6 +209,59 @@ public final class UTF16Test extends TestFmwk
      errln("FAIL Setting non-supplementary characters at a surrogate " +
            "position");
  }
+  
+  /**
+  * Testing countCodePoint, findOffsetFromCodePoint and findCodePointOffset
+  */
+  public void TestUTF16CodePointOffset()
+  {
+    // jitterbug 47
+    String str = "a\uD800\uDC00b";
+    if (UTF16.findCodePointOffset(str, 0) != 0 ||
+        UTF16.findOffsetFromCodePoint(str, 0) != 0) {
+        errln("FAIL Getting the first codepoint offset to a string with " +
+              "supplementary characters");
+    }
+    if (UTF16.findCodePointOffset(str, 1) != 1 ||
+        UTF16.findOffsetFromCodePoint(str, 1) != 1) {
+        errln("FAIL Getting the second codepoint offset to a string with " +
+              "supplementary characters");
+    }
+    if (UTF16.findCodePointOffset(str, 2) != 1 ||
+        UTF16.findOffsetFromCodePoint(str, 2) != 3) {
+        errln("FAIL Getting the third codepoint offset to a string with " +
+              "supplementary characters");
+    }
+    if (UTF16.findCodePointOffset(str, 3) != 2 ||
+        UTF16.findOffsetFromCodePoint(str, 3) != 4) {
+        errln("FAIL Getting the last codepoint offset to a string with " +
+              "supplementary characters");
+    }
+    if (UTF16.findCodePointOffset(str, 4) != 3) {
+        errln("FAIL Getting the length offset to a string with " +
+              "supplementary characters");
+    }
+    try {
+        UTF16.findCodePointOffset(str, 5);
+        errln("FAIL Getting the a non-existence codepoint to a string with " +
+              "supplementary characters");
+    } catch (Exception e) {
+        // this is a success
+        logln("Passed out of bounds codepoint offset");
+    }
+    try {
+        UTF16.findOffsetFromCodePoint(str, 4);
+        errln("FAIL Getting the a non-existence codepoint to a string with " +
+              "supplementary characters");
+    } catch (Exception e) {
+        // this is a success
+        logln("Passed out of bounds codepoint offset");
+    }
+    if (UTF16.countCodePoint(str) != 3) {
+        errln("FAIL Counting the number of codepoints in a string with " +
+              "supplementary characters");
+    }
+  }
 
  public static void main(String[] arg)
  {
--- a/icu4j/src/com/ibm/icu/test/text/UTF16Test.java
+++ b/icu4j/src/com/ibm/icu/test/text/UTF16Test.java
@ -5,8 +5,8 @@
 *******************************************************************************
 *
 * $Source: /xsrl/Nsvn/icu/icu4j/src/com/ibm/icu/test/text/Attic/UTF16Test.java,v $ 
-* $Date: 2001/03/23 19:52:03 $ 
-* $Revision: 1.5 $
+* $Date: 2001/08/23 02:20:59 $ 
+* $Revision: 1.6 $
 *
 *******************************************************************************
 */
@ -209,6 +209,59 @@ public final class UTF16Test extends TestFmwk
      errln("FAIL Setting non-supplementary characters at a surrogate " +
            "position");
  }
+  
+  /**
+  * Testing countCodePoint, findOffsetFromCodePoint and findCodePointOffset
+  */
+  public void TestUTF16CodePointOffset()
+  {
+    // jitterbug 47
+    String str = "a\uD800\uDC00b";
+    if (UTF16.findCodePointOffset(str, 0) != 0 ||
+        UTF16.findOffsetFromCodePoint(str, 0) != 0) {
+        errln("FAIL Getting the first codepoint offset to a string with " +
+              "supplementary characters");
+    }
+    if (UTF16.findCodePointOffset(str, 1) != 1 ||
+        UTF16.findOffsetFromCodePoint(str, 1) != 1) {
+        errln("FAIL Getting the second codepoint offset to a string with " +
+              "supplementary characters");
+    }
+    if (UTF16.findCodePointOffset(str, 2) != 1 ||
+        UTF16.findOffsetFromCodePoint(str, 2) != 3) {
+        errln("FAIL Getting the third codepoint offset to a string with " +
+              "supplementary characters");
+    }
+    if (UTF16.findCodePointOffset(str, 3) != 2 ||
+        UTF16.findOffsetFromCodePoint(str, 3) != 4) {
+        errln("FAIL Getting the last codepoint offset to a string with " +
+              "supplementary characters");
+    }
+    if (UTF16.findCodePointOffset(str, 4) != 3) {
+        errln("FAIL Getting the length offset to a string with " +
+              "supplementary characters");
+    }
+    try {
+        UTF16.findCodePointOffset(str, 5);
+        errln("FAIL Getting the a non-existence codepoint to a string with " +
+              "supplementary characters");
+    } catch (Exception e) {
+        // this is a success
+        logln("Passed out of bounds codepoint offset");
+    }
+    try {
+        UTF16.findOffsetFromCodePoint(str, 4);
+        errln("FAIL Getting the a non-existence codepoint to a string with " +
+              "supplementary characters");
+    } catch (Exception e) {
+        // this is a success
+        logln("Passed out of bounds codepoint offset");
+    }
+    if (UTF16.countCodePoint(str) != 3) {
+        errln("FAIL Counting the number of codepoints in a string with " +
+              "supplementary characters");
+    }
+  }
 
  public static void main(String[] arg)
  {
--- a/icu4j/src/com/ibm/icu/text/UTF16.java
+++ b/icu4j/src/com/ibm/icu/text/UTF16.java
@ -5,8 +5,8 @@
 *******************************************************************************
 *
 * $Source: /xsrl/Nsvn/icu/icu4j/src/com/ibm/icu/text/UTF16.java,v $ 
-* $Date: 2001/07/25 20:49:30 $ 
-* $Revision: 1.5 $
+* $Date: 2001/08/23 02:21:07 $ 
+* $Revision: 1.6 $
 *
 *******************************************************************************
 */
@ -430,7 +430,7 @@ public final class UTF16
      count --;
      result ++;
    }
-    if (result >= size) {
+    if (count != 0) {
      throw new StringIndexOutOfBoundsException(offset32);
    }
    return result;
@ -457,7 +457,7 @@ public final class UTF16
  */
  public static int findCodePointOffset(String source, int offset16) 
  {
-    if (offset16 < 0 || offset16 >= source.length()) {
+    if (offset16 < 0 || offset16 > source.length()) {
      throw new StringIndexOutOfBoundsException(offset16);
    }
     
@ -477,9 +477,14 @@ public final class UTF16
        ++ result;                          // count others as 1
      }
    }
-    // end of source being a supplementary character
+    
+    if (offset16 == source.length()) {
+        return result;
+    }
+    
+    // end of source being the less significant surrogate character
    // shift result back to the start of the supplementary character
-    if (hadLeadSurrogate && isTrailSurrogate(source.charAt(offset16))) {
+    if (hadLeadSurrogate && (isTrailSurrogate(source.charAt(offset16)))) {
      result --;
    }
      
@ -593,7 +598,10 @@ public final class UTF16
  */
  public static int countCodePoint(String s)
  {
-    return findCodePointOffset(s, s.length() - 1) + 1;
+    if (s == null || s.length() == 0) {
+        return 0;
+    }
+    return findCodePointOffset(s, s.length());
  }
  
  /**
@ -603,7 +611,7 @@ public final class UTF16
  * @param char32 code point
  */
  public static void setCharAtCodePointOffset(StringBuffer str, int offset32, 
-                                       int char32)
+                                              int char32)
  {
    int offset16 = findOffsetFromCodePoint(str.toString(), offset32);
    setCharAt(str, offset16, char32);
--- a/icu4j/src/com/ibm/text/UTF16.java
+++ b/icu4j/src/com/ibm/text/UTF16.java
@ -5,8 +5,8 @@
 *******************************************************************************
 *
 * $Source: /xsrl/Nsvn/icu/icu4j/src/com/ibm/text/Attic/UTF16.java,v $ 
-* $Date: 2001/07/25 20:49:30 $ 
-* $Revision: 1.5 $
+* $Date: 2001/08/23 02:21:07 $ 
+* $Revision: 1.6 $
 *
 *******************************************************************************
 */
@ -430,7 +430,7 @@ public final class UTF16
      count --;
      result ++;
    }
-    if (result >= size) {
+    if (count != 0) {
      throw new StringIndexOutOfBoundsException(offset32);
    }
    return result;
@ -457,7 +457,7 @@ public final class UTF16
  */
  public static int findCodePointOffset(String source, int offset16) 
  {
-    if (offset16 < 0 || offset16 >= source.length()) {
+    if (offset16 < 0 || offset16 > source.length()) {
      throw new StringIndexOutOfBoundsException(offset16);
    }
     
@ -477,9 +477,14 @@ public final class UTF16
        ++ result;                          // count others as 1
      }
    }
-    // end of source being a supplementary character
+    
+    if (offset16 == source.length()) {
+        return result;
+    }
+    
+    // end of source being the less significant surrogate character
    // shift result back to the start of the supplementary character
-    if (hadLeadSurrogate && isTrailSurrogate(source.charAt(offset16))) {
+    if (hadLeadSurrogate && (isTrailSurrogate(source.charAt(offset16)))) {
      result --;
    }
      
@ -593,7 +598,10 @@ public final class UTF16
  */
  public static int countCodePoint(String s)
  {
-    return findCodePointOffset(s, s.length() - 1) + 1;
+    if (s == null || s.length() == 0) {
+        return 0;
+    }
+    return findCodePointOffset(s, s.length());
  }
  
  /**
@ -603,7 +611,7 @@ public final class UTF16
  * @param char32 code point
  */
  public static void setCharAtCodePointOffset(StringBuffer str, int offset32, 
-                                       int char32)
+                                              int char32)
  {
    int offset16 = findOffsetFromCodePoint(str.toString(), offset32);
    setCharAt(str, offset16, char32);