Allowed string length as an offset in findOffsetFromCodePoint,
findCodePointOffset and countCodePoint.

X-SVN-Rev: 5566
This commit is contained in:
Syn Wee Quek 2001-08-23 02:21:07 +00:00
parent f9081a2e8e
commit 26d5e85247
4 changed files with 142 additions and 20 deletions

View File

@ -5,8 +5,8 @@
*******************************************************************************
*
* $Source: /xsrl/Nsvn/icu/icu4j/src/com/ibm/icu/dev/test/lang/UTF16Test.java,v $
* $Date: 2001/03/23 19:52:03 $
* $Revision: 1.5 $
* $Date: 2001/08/23 02:20:59 $
* $Revision: 1.6 $
*
*******************************************************************************
*/
@ -210,6 +210,59 @@ public final class UTF16Test extends TestFmwk
"position");
}
/**
* Testing countCodePoint, findOffsetFromCodePoint and findCodePointOffset
*/
public void TestUTF16CodePointOffset()
{
// jitterbug 47
String str = "a\uD800\uDC00b";
if (UTF16.findCodePointOffset(str, 0) != 0 ||
UTF16.findOffsetFromCodePoint(str, 0) != 0) {
errln("FAIL Getting the first codepoint offset to a string with " +
"supplementary characters");
}
if (UTF16.findCodePointOffset(str, 1) != 1 ||
UTF16.findOffsetFromCodePoint(str, 1) != 1) {
errln("FAIL Getting the second codepoint offset to a string with " +
"supplementary characters");
}
if (UTF16.findCodePointOffset(str, 2) != 1 ||
UTF16.findOffsetFromCodePoint(str, 2) != 3) {
errln("FAIL Getting the third codepoint offset to a string with " +
"supplementary characters");
}
if (UTF16.findCodePointOffset(str, 3) != 2 ||
UTF16.findOffsetFromCodePoint(str, 3) != 4) {
errln("FAIL Getting the last codepoint offset to a string with " +
"supplementary characters");
}
if (UTF16.findCodePointOffset(str, 4) != 3) {
errln("FAIL Getting the length offset to a string with " +
"supplementary characters");
}
try {
UTF16.findCodePointOffset(str, 5);
errln("FAIL Getting the a non-existence codepoint to a string with " +
"supplementary characters");
} catch (Exception e) {
// this is a success
logln("Passed out of bounds codepoint offset");
}
try {
UTF16.findOffsetFromCodePoint(str, 4);
errln("FAIL Getting the a non-existence codepoint to a string with " +
"supplementary characters");
} catch (Exception e) {
// this is a success
logln("Passed out of bounds codepoint offset");
}
if (UTF16.countCodePoint(str) != 3) {
errln("FAIL Counting the number of codepoints in a string with " +
"supplementary characters");
}
}
public static void main(String[] arg)
{
try

View File

@ -5,8 +5,8 @@
*******************************************************************************
*
* $Source: /xsrl/Nsvn/icu/icu4j/src/com/ibm/icu/test/text/Attic/UTF16Test.java,v $
* $Date: 2001/03/23 19:52:03 $
* $Revision: 1.5 $
* $Date: 2001/08/23 02:20:59 $
* $Revision: 1.6 $
*
*******************************************************************************
*/
@ -210,6 +210,59 @@ public final class UTF16Test extends TestFmwk
"position");
}
/**
* Testing countCodePoint, findOffsetFromCodePoint and findCodePointOffset
*/
public void TestUTF16CodePointOffset()
{
// jitterbug 47
String str = "a\uD800\uDC00b";
if (UTF16.findCodePointOffset(str, 0) != 0 ||
UTF16.findOffsetFromCodePoint(str, 0) != 0) {
errln("FAIL Getting the first codepoint offset to a string with " +
"supplementary characters");
}
if (UTF16.findCodePointOffset(str, 1) != 1 ||
UTF16.findOffsetFromCodePoint(str, 1) != 1) {
errln("FAIL Getting the second codepoint offset to a string with " +
"supplementary characters");
}
if (UTF16.findCodePointOffset(str, 2) != 1 ||
UTF16.findOffsetFromCodePoint(str, 2) != 3) {
errln("FAIL Getting the third codepoint offset to a string with " +
"supplementary characters");
}
if (UTF16.findCodePointOffset(str, 3) != 2 ||
UTF16.findOffsetFromCodePoint(str, 3) != 4) {
errln("FAIL Getting the last codepoint offset to a string with " +
"supplementary characters");
}
if (UTF16.findCodePointOffset(str, 4) != 3) {
errln("FAIL Getting the length offset to a string with " +
"supplementary characters");
}
try {
UTF16.findCodePointOffset(str, 5);
errln("FAIL Getting the a non-existence codepoint to a string with " +
"supplementary characters");
} catch (Exception e) {
// this is a success
logln("Passed out of bounds codepoint offset");
}
try {
UTF16.findOffsetFromCodePoint(str, 4);
errln("FAIL Getting the a non-existence codepoint to a string with " +
"supplementary characters");
} catch (Exception e) {
// this is a success
logln("Passed out of bounds codepoint offset");
}
if (UTF16.countCodePoint(str) != 3) {
errln("FAIL Counting the number of codepoints in a string with " +
"supplementary characters");
}
}
public static void main(String[] arg)
{
try

View File

@ -5,8 +5,8 @@
*******************************************************************************
*
* $Source: /xsrl/Nsvn/icu/icu4j/src/com/ibm/icu/text/UTF16.java,v $
* $Date: 2001/07/25 20:49:30 $
* $Revision: 1.5 $
* $Date: 2001/08/23 02:21:07 $
* $Revision: 1.6 $
*
*******************************************************************************
*/
@ -430,7 +430,7 @@ public final class UTF16
count --;
result ++;
}
if (result >= size) {
if (count != 0) {
throw new StringIndexOutOfBoundsException(offset32);
}
return result;
@ -457,7 +457,7 @@ public final class UTF16
*/
public static int findCodePointOffset(String source, int offset16)
{
if (offset16 < 0 || offset16 >= source.length()) {
if (offset16 < 0 || offset16 > source.length()) {
throw new StringIndexOutOfBoundsException(offset16);
}
@ -477,9 +477,14 @@ public final class UTF16
++ result; // count others as 1
}
}
// end of source being a supplementary character
if (offset16 == source.length()) {
return result;
}
// end of source being the less significant surrogate character
// shift result back to the start of the supplementary character
if (hadLeadSurrogate && isTrailSurrogate(source.charAt(offset16))) {
if (hadLeadSurrogate && (isTrailSurrogate(source.charAt(offset16)))) {
result --;
}
@ -593,7 +598,10 @@ public final class UTF16
*/
public static int countCodePoint(String s)
{
return findCodePointOffset(s, s.length() - 1) + 1;
if (s == null || s.length() == 0) {
return 0;
}
return findCodePointOffset(s, s.length());
}
/**
@ -603,7 +611,7 @@ public final class UTF16
* @param char32 code point
*/
public static void setCharAtCodePointOffset(StringBuffer str, int offset32,
int char32)
int char32)
{
int offset16 = findOffsetFromCodePoint(str.toString(), offset32);
setCharAt(str, offset16, char32);

View File

@ -5,8 +5,8 @@
*******************************************************************************
*
* $Source: /xsrl/Nsvn/icu/icu4j/src/com/ibm/text/Attic/UTF16.java,v $
* $Date: 2001/07/25 20:49:30 $
* $Revision: 1.5 $
* $Date: 2001/08/23 02:21:07 $
* $Revision: 1.6 $
*
*******************************************************************************
*/
@ -430,7 +430,7 @@ public final class UTF16
count --;
result ++;
}
if (result >= size) {
if (count != 0) {
throw new StringIndexOutOfBoundsException(offset32);
}
return result;
@ -457,7 +457,7 @@ public final class UTF16
*/
public static int findCodePointOffset(String source, int offset16)
{
if (offset16 < 0 || offset16 >= source.length()) {
if (offset16 < 0 || offset16 > source.length()) {
throw new StringIndexOutOfBoundsException(offset16);
}
@ -477,9 +477,14 @@ public final class UTF16
++ result; // count others as 1
}
}
// end of source being a supplementary character
if (offset16 == source.length()) {
return result;
}
// end of source being the less significant surrogate character
// shift result back to the start of the supplementary character
if (hadLeadSurrogate && isTrailSurrogate(source.charAt(offset16))) {
if (hadLeadSurrogate && (isTrailSurrogate(source.charAt(offset16)))) {
result --;
}
@ -593,7 +598,10 @@ public final class UTF16
*/
public static int countCodePoint(String s)
{
return findCodePointOffset(s, s.length() - 1) + 1;
if (s == null || s.length() == 0) {
return 0;
}
return findCodePointOffset(s, s.length());
}
/**
@ -603,7 +611,7 @@ public final class UTF16
* @param char32 code point
*/
public static void setCharAtCodePointOffset(StringBuffer str, int offset32,
int char32)
int char32)
{
int offset16 = findOffsetFromCodePoint(str.toString(), offset32);
setCharAt(str, offset16, char32);