ICU-47
Allowed string length as an offset in findOffsetFromCodePoint, findCodePointOffset and countCodePoint. X-SVN-Rev: 5566
This commit is contained in:
parent
f9081a2e8e
commit
26d5e85247
@ -5,8 +5,8 @@
|
||||
*******************************************************************************
|
||||
*
|
||||
* $Source: /xsrl/Nsvn/icu/icu4j/src/com/ibm/icu/dev/test/lang/UTF16Test.java,v $
|
||||
* $Date: 2001/03/23 19:52:03 $
|
||||
* $Revision: 1.5 $
|
||||
* $Date: 2001/08/23 02:20:59 $
|
||||
* $Revision: 1.6 $
|
||||
*
|
||||
*******************************************************************************
|
||||
*/
|
||||
@ -209,6 +209,59 @@ public final class UTF16Test extends TestFmwk
|
||||
errln("FAIL Setting non-supplementary characters at a surrogate " +
|
||||
"position");
|
||||
}
|
||||
|
||||
/**
|
||||
* Testing countCodePoint, findOffsetFromCodePoint and findCodePointOffset
|
||||
*/
|
||||
public void TestUTF16CodePointOffset()
|
||||
{
|
||||
// jitterbug 47
|
||||
String str = "a\uD800\uDC00b";
|
||||
if (UTF16.findCodePointOffset(str, 0) != 0 ||
|
||||
UTF16.findOffsetFromCodePoint(str, 0) != 0) {
|
||||
errln("FAIL Getting the first codepoint offset to a string with " +
|
||||
"supplementary characters");
|
||||
}
|
||||
if (UTF16.findCodePointOffset(str, 1) != 1 ||
|
||||
UTF16.findOffsetFromCodePoint(str, 1) != 1) {
|
||||
errln("FAIL Getting the second codepoint offset to a string with " +
|
||||
"supplementary characters");
|
||||
}
|
||||
if (UTF16.findCodePointOffset(str, 2) != 1 ||
|
||||
UTF16.findOffsetFromCodePoint(str, 2) != 3) {
|
||||
errln("FAIL Getting the third codepoint offset to a string with " +
|
||||
"supplementary characters");
|
||||
}
|
||||
if (UTF16.findCodePointOffset(str, 3) != 2 ||
|
||||
UTF16.findOffsetFromCodePoint(str, 3) != 4) {
|
||||
errln("FAIL Getting the last codepoint offset to a string with " +
|
||||
"supplementary characters");
|
||||
}
|
||||
if (UTF16.findCodePointOffset(str, 4) != 3) {
|
||||
errln("FAIL Getting the length offset to a string with " +
|
||||
"supplementary characters");
|
||||
}
|
||||
try {
|
||||
UTF16.findCodePointOffset(str, 5);
|
||||
errln("FAIL Getting the a non-existence codepoint to a string with " +
|
||||
"supplementary characters");
|
||||
} catch (Exception e) {
|
||||
// this is a success
|
||||
logln("Passed out of bounds codepoint offset");
|
||||
}
|
||||
try {
|
||||
UTF16.findOffsetFromCodePoint(str, 4);
|
||||
errln("FAIL Getting the a non-existence codepoint to a string with " +
|
||||
"supplementary characters");
|
||||
} catch (Exception e) {
|
||||
// this is a success
|
||||
logln("Passed out of bounds codepoint offset");
|
||||
}
|
||||
if (UTF16.countCodePoint(str) != 3) {
|
||||
errln("FAIL Counting the number of codepoints in a string with " +
|
||||
"supplementary characters");
|
||||
}
|
||||
}
|
||||
|
||||
public static void main(String[] arg)
|
||||
{
|
||||
|
@ -5,8 +5,8 @@
|
||||
*******************************************************************************
|
||||
*
|
||||
* $Source: /xsrl/Nsvn/icu/icu4j/src/com/ibm/icu/test/text/Attic/UTF16Test.java,v $
|
||||
* $Date: 2001/03/23 19:52:03 $
|
||||
* $Revision: 1.5 $
|
||||
* $Date: 2001/08/23 02:20:59 $
|
||||
* $Revision: 1.6 $
|
||||
*
|
||||
*******************************************************************************
|
||||
*/
|
||||
@ -209,6 +209,59 @@ public final class UTF16Test extends TestFmwk
|
||||
errln("FAIL Setting non-supplementary characters at a surrogate " +
|
||||
"position");
|
||||
}
|
||||
|
||||
/**
|
||||
* Testing countCodePoint, findOffsetFromCodePoint and findCodePointOffset
|
||||
*/
|
||||
public void TestUTF16CodePointOffset()
|
||||
{
|
||||
// jitterbug 47
|
||||
String str = "a\uD800\uDC00b";
|
||||
if (UTF16.findCodePointOffset(str, 0) != 0 ||
|
||||
UTF16.findOffsetFromCodePoint(str, 0) != 0) {
|
||||
errln("FAIL Getting the first codepoint offset to a string with " +
|
||||
"supplementary characters");
|
||||
}
|
||||
if (UTF16.findCodePointOffset(str, 1) != 1 ||
|
||||
UTF16.findOffsetFromCodePoint(str, 1) != 1) {
|
||||
errln("FAIL Getting the second codepoint offset to a string with " +
|
||||
"supplementary characters");
|
||||
}
|
||||
if (UTF16.findCodePointOffset(str, 2) != 1 ||
|
||||
UTF16.findOffsetFromCodePoint(str, 2) != 3) {
|
||||
errln("FAIL Getting the third codepoint offset to a string with " +
|
||||
"supplementary characters");
|
||||
}
|
||||
if (UTF16.findCodePointOffset(str, 3) != 2 ||
|
||||
UTF16.findOffsetFromCodePoint(str, 3) != 4) {
|
||||
errln("FAIL Getting the last codepoint offset to a string with " +
|
||||
"supplementary characters");
|
||||
}
|
||||
if (UTF16.findCodePointOffset(str, 4) != 3) {
|
||||
errln("FAIL Getting the length offset to a string with " +
|
||||
"supplementary characters");
|
||||
}
|
||||
try {
|
||||
UTF16.findCodePointOffset(str, 5);
|
||||
errln("FAIL Getting the a non-existence codepoint to a string with " +
|
||||
"supplementary characters");
|
||||
} catch (Exception e) {
|
||||
// this is a success
|
||||
logln("Passed out of bounds codepoint offset");
|
||||
}
|
||||
try {
|
||||
UTF16.findOffsetFromCodePoint(str, 4);
|
||||
errln("FAIL Getting the a non-existence codepoint to a string with " +
|
||||
"supplementary characters");
|
||||
} catch (Exception e) {
|
||||
// this is a success
|
||||
logln("Passed out of bounds codepoint offset");
|
||||
}
|
||||
if (UTF16.countCodePoint(str) != 3) {
|
||||
errln("FAIL Counting the number of codepoints in a string with " +
|
||||
"supplementary characters");
|
||||
}
|
||||
}
|
||||
|
||||
public static void main(String[] arg)
|
||||
{
|
||||
|
@ -5,8 +5,8 @@
|
||||
*******************************************************************************
|
||||
*
|
||||
* $Source: /xsrl/Nsvn/icu/icu4j/src/com/ibm/icu/text/UTF16.java,v $
|
||||
* $Date: 2001/07/25 20:49:30 $
|
||||
* $Revision: 1.5 $
|
||||
* $Date: 2001/08/23 02:21:07 $
|
||||
* $Revision: 1.6 $
|
||||
*
|
||||
*******************************************************************************
|
||||
*/
|
||||
@ -430,7 +430,7 @@ public final class UTF16
|
||||
count --;
|
||||
result ++;
|
||||
}
|
||||
if (result >= size) {
|
||||
if (count != 0) {
|
||||
throw new StringIndexOutOfBoundsException(offset32);
|
||||
}
|
||||
return result;
|
||||
@ -457,7 +457,7 @@ public final class UTF16
|
||||
*/
|
||||
public static int findCodePointOffset(String source, int offset16)
|
||||
{
|
||||
if (offset16 < 0 || offset16 >= source.length()) {
|
||||
if (offset16 < 0 || offset16 > source.length()) {
|
||||
throw new StringIndexOutOfBoundsException(offset16);
|
||||
}
|
||||
|
||||
@ -477,9 +477,14 @@ public final class UTF16
|
||||
++ result; // count others as 1
|
||||
}
|
||||
}
|
||||
// end of source being a supplementary character
|
||||
|
||||
if (offset16 == source.length()) {
|
||||
return result;
|
||||
}
|
||||
|
||||
// end of source being the less significant surrogate character
|
||||
// shift result back to the start of the supplementary character
|
||||
if (hadLeadSurrogate && isTrailSurrogate(source.charAt(offset16))) {
|
||||
if (hadLeadSurrogate && (isTrailSurrogate(source.charAt(offset16)))) {
|
||||
result --;
|
||||
}
|
||||
|
||||
@ -593,7 +598,10 @@ public final class UTF16
|
||||
*/
|
||||
public static int countCodePoint(String s)
|
||||
{
|
||||
return findCodePointOffset(s, s.length() - 1) + 1;
|
||||
if (s == null || s.length() == 0) {
|
||||
return 0;
|
||||
}
|
||||
return findCodePointOffset(s, s.length());
|
||||
}
|
||||
|
||||
/**
|
||||
@ -603,7 +611,7 @@ public final class UTF16
|
||||
* @param char32 code point
|
||||
*/
|
||||
public static void setCharAtCodePointOffset(StringBuffer str, int offset32,
|
||||
int char32)
|
||||
int char32)
|
||||
{
|
||||
int offset16 = findOffsetFromCodePoint(str.toString(), offset32);
|
||||
setCharAt(str, offset16, char32);
|
||||
|
@ -5,8 +5,8 @@
|
||||
*******************************************************************************
|
||||
*
|
||||
* $Source: /xsrl/Nsvn/icu/icu4j/src/com/ibm/text/Attic/UTF16.java,v $
|
||||
* $Date: 2001/07/25 20:49:30 $
|
||||
* $Revision: 1.5 $
|
||||
* $Date: 2001/08/23 02:21:07 $
|
||||
* $Revision: 1.6 $
|
||||
*
|
||||
*******************************************************************************
|
||||
*/
|
||||
@ -430,7 +430,7 @@ public final class UTF16
|
||||
count --;
|
||||
result ++;
|
||||
}
|
||||
if (result >= size) {
|
||||
if (count != 0) {
|
||||
throw new StringIndexOutOfBoundsException(offset32);
|
||||
}
|
||||
return result;
|
||||
@ -457,7 +457,7 @@ public final class UTF16
|
||||
*/
|
||||
public static int findCodePointOffset(String source, int offset16)
|
||||
{
|
||||
if (offset16 < 0 || offset16 >= source.length()) {
|
||||
if (offset16 < 0 || offset16 > source.length()) {
|
||||
throw new StringIndexOutOfBoundsException(offset16);
|
||||
}
|
||||
|
||||
@ -477,9 +477,14 @@ public final class UTF16
|
||||
++ result; // count others as 1
|
||||
}
|
||||
}
|
||||
// end of source being a supplementary character
|
||||
|
||||
if (offset16 == source.length()) {
|
||||
return result;
|
||||
}
|
||||
|
||||
// end of source being the less significant surrogate character
|
||||
// shift result back to the start of the supplementary character
|
||||
if (hadLeadSurrogate && isTrailSurrogate(source.charAt(offset16))) {
|
||||
if (hadLeadSurrogate && (isTrailSurrogate(source.charAt(offset16)))) {
|
||||
result --;
|
||||
}
|
||||
|
||||
@ -593,7 +598,10 @@ public final class UTF16
|
||||
*/
|
||||
public static int countCodePoint(String s)
|
||||
{
|
||||
return findCodePointOffset(s, s.length() - 1) + 1;
|
||||
if (s == null || s.length() == 0) {
|
||||
return 0;
|
||||
}
|
||||
return findCodePointOffset(s, s.length());
|
||||
}
|
||||
|
||||
/**
|
||||
@ -603,7 +611,7 @@ public final class UTF16
|
||||
* @param char32 code point
|
||||
*/
|
||||
public static void setCharAtCodePointOffset(StringBuffer str, int offset32,
|
||||
int char32)
|
||||
int char32)
|
||||
{
|
||||
int offset16 = findOffsetFromCodePoint(str.toString(), offset32);
|
||||
setCharAt(str, offset16, char32);
|
||||
|
Loading…
Reference in New Issue
Block a user