Add "Remove" transliterator

X-SVN-Rev: 4442
This commit is contained in:
Alan Liu 2001-04-04 18:07:08 +00:00
parent 73d42ad240
commit 9f8d255013
6 changed files with 194 additions and 12 deletions

View File

@ -5,8 +5,8 @@
*******************************************************************************
*
* $Source: /xsrl/Nsvn/icu/icu4j/src/com/ibm/icu/dev/test/translit/TransliteratorTest.java,v $
* $Date: 2001/04/03 18:21:57 $
* $Revision: 1.33 $
* $Date: 2001/04/04 18:07:08 $
* $Revision: 1.34 $
*
*****************************************************************************************
*/
@ -742,6 +742,11 @@ public class TransliteratorTest extends TestFmwk {
"Null[abc]",
"xyz",
"xyz",
"Remove[abc]",
"Remove[abc]",
"axbycz",
"xyz",
};
for (int i=0; i<DATA.length; i+=4) {
@ -767,6 +772,15 @@ public class TransliteratorTest extends TestFmwk {
}
}
/**
* Test the "Remove" transliterator.
*/
public void TestRemove() {
Transliterator t = Transliterator.getInstance("Remove[aeiou]");
expect(t, "The quick brown fox.",
"Th qck brwn fx.");
}
//======================================================================
// Support methods
//======================================================================

View File

@ -0,0 +1,74 @@
/*
*******************************************************************************
* Copyright (C) 1996-2000, International Business Machines Corporation and *
* others. All Rights Reserved. *
*******************************************************************************
*
* $Source: /xsrl/Nsvn/icu/icu4j/src/com/ibm/icu/text/RemoveTransliterator.java,v $
* $Date: 2001/04/04 18:06:53 $
* $Revision: 1.1 $
*
*****************************************************************************************
*/
package com.ibm.text;
import java.util.*;
/**
* A transliterator that removes characters. This is useful in conjunction
* with a filter.
*/
public class RemoveTransliterator extends Transliterator {
/**
* Package accessible ID for this transliterator.
*/
static String _ID = "Remove";
/**
* Constructs a transliterator.
*/
public RemoveTransliterator() {
super(_ID, null);
}
/**
* Implements {@link Transliterator#handleTransliterate}.
*/
protected void handleTransliterate(Replaceable text,
Position offsets, boolean incremental) {
// Find runs of unfiltered characters and replace them with the
// empty string. This loop has been optimized to what is probably
// an unnecessary degree.
String empty = "";
int start = offsets.start;
for (;;) {
// Find first unfiltered character, if any
while (start < offsets.limit &&
filteredCharAt(text, start) == '\uFFFE') {
++start;
}
if (start >= offsets.limit) {
break;
}
// assert(start < offsets.limit &&
// filteredCharAt(text, start) != 0xFFFE);
// Find last unfiltered character
int limit = start+1; // sic: +1
while (limit < offsets.limit &&
filteredCharAt(text, limit) != '\uFFFE') {
++limit;
}
// assert(start < limit);
// Remove characters
text.replace(start, limit, empty);
limit -= start; // limit <= deleted length
offsets.contextLimit -= limit;
offsets.limit -= limit;
}
offsets.start = start;
}
}

View File

@ -5,8 +5,8 @@
*******************************************************************************
*
* $Source: /xsrl/Nsvn/icu/icu4j/src/com/ibm/icu/text/Transliterator.java,v $
* $Date: 2001/03/31 01:31:13 $
* $Revision: 1.30 $
* $Date: 2001/04/04 18:06:25 $
* $Revision: 1.31 $
*
*****************************************************************************************
*/
@ -240,7 +240,7 @@ import com.ibm.text.resources.ResourceReader;
* <p>Copyright &copy; IBM Corporation 1999. All rights reserved.
*
* @author Alan Liu
* @version $RCSfile: Transliterator.java,v $ $Revision: 1.30 $ $Date: 2001/03/31 01:31:13 $
* @version $RCSfile: Transliterator.java,v $ $Revision: 1.31 $ $Date: 2001/04/04 18:06:25 $
*/
public abstract class Transliterator {
/**
@ -794,7 +794,8 @@ public abstract class Transliterator {
if (direction == REVERSE) {
int i = id.indexOf('-');
if (i < 0) {
if (!id.equals(NullTransliterator._ID)) {
if (!id.equals(NullTransliterator._ID) &&
!id.equals(RemoveTransliterator._ID)) {
throw new IllegalArgumentException("No inverse for: "
+ id);
}
@ -1083,5 +1084,7 @@ public abstract class Transliterator {
UnicodeToHexTransliterator.class, null);
registerClass(NullTransliterator._ID,
NullTransliterator.class, null);
registerClass(RemoveTransliterator._ID,
RemoveTransliterator.class, null);
}
}

View File

@ -5,8 +5,8 @@
*******************************************************************************
*
* $Source: /xsrl/Nsvn/icu/icu4j/src/com/ibm/test/translit/Attic/TransliteratorTest.java,v $
* $Date: 2001/04/03 18:21:57 $
* $Revision: 1.33 $
* $Date: 2001/04/04 18:07:08 $
* $Revision: 1.34 $
*
*****************************************************************************************
*/
@ -742,6 +742,11 @@ public class TransliteratorTest extends TestFmwk {
"Null[abc]",
"xyz",
"xyz",
"Remove[abc]",
"Remove[abc]",
"axbycz",
"xyz",
};
for (int i=0; i<DATA.length; i+=4) {
@ -767,6 +772,15 @@ public class TransliteratorTest extends TestFmwk {
}
}
/**
* Test the "Remove" transliterator.
*/
public void TestRemove() {
Transliterator t = Transliterator.getInstance("Remove[aeiou]");
expect(t, "The quick brown fox.",
"Th qck brwn fx.");
}
//======================================================================
// Support methods
//======================================================================

View File

@ -0,0 +1,74 @@
/*
*******************************************************************************
* Copyright (C) 1996-2000, International Business Machines Corporation and *
* others. All Rights Reserved. *
*******************************************************************************
*
* $Source: /xsrl/Nsvn/icu/icu4j/src/com/ibm/text/Attic/RemoveTransliterator.java,v $
* $Date: 2001/04/04 18:06:53 $
* $Revision: 1.1 $
*
*****************************************************************************************
*/
package com.ibm.text;
import java.util.*;
/**
* A transliterator that removes characters. This is useful in conjunction
* with a filter.
*/
public class RemoveTransliterator extends Transliterator {
/**
* Package accessible ID for this transliterator.
*/
static String _ID = "Remove";
/**
* Constructs a transliterator.
*/
public RemoveTransliterator() {
super(_ID, null);
}
/**
* Implements {@link Transliterator#handleTransliterate}.
*/
protected void handleTransliterate(Replaceable text,
Position offsets, boolean incremental) {
// Find runs of unfiltered characters and replace them with the
// empty string. This loop has been optimized to what is probably
// an unnecessary degree.
String empty = "";
int start = offsets.start;
for (;;) {
// Find first unfiltered character, if any
while (start < offsets.limit &&
filteredCharAt(text, start) == '\uFFFE') {
++start;
}
if (start >= offsets.limit) {
break;
}
// assert(start < offsets.limit &&
// filteredCharAt(text, start) != 0xFFFE);
// Find last unfiltered character
int limit = start+1; // sic: +1
while (limit < offsets.limit &&
filteredCharAt(text, limit) != '\uFFFE') {
++limit;
}
// assert(start < limit);
// Remove characters
text.replace(start, limit, empty);
limit -= start; // limit <= deleted length
offsets.contextLimit -= limit;
offsets.limit -= limit;
}
offsets.start = start;
}
}

View File

@ -5,8 +5,8 @@
*******************************************************************************
*
* $Source: /xsrl/Nsvn/icu/icu4j/src/com/ibm/text/Attic/Transliterator.java,v $
* $Date: 2001/03/31 01:31:13 $
* $Revision: 1.30 $
* $Date: 2001/04/04 18:06:25 $
* $Revision: 1.31 $
*
*****************************************************************************************
*/
@ -240,7 +240,7 @@ import com.ibm.text.resources.ResourceReader;
* <p>Copyright &copy; IBM Corporation 1999. All rights reserved.
*
* @author Alan Liu
* @version $RCSfile: Transliterator.java,v $ $Revision: 1.30 $ $Date: 2001/03/31 01:31:13 $
* @version $RCSfile: Transliterator.java,v $ $Revision: 1.31 $ $Date: 2001/04/04 18:06:25 $
*/
public abstract class Transliterator {
/**
@ -794,7 +794,8 @@ public abstract class Transliterator {
if (direction == REVERSE) {
int i = id.indexOf('-');
if (i < 0) {
if (!id.equals(NullTransliterator._ID)) {
if (!id.equals(NullTransliterator._ID) &&
!id.equals(RemoveTransliterator._ID)) {
throw new IllegalArgumentException("No inverse for: "
+ id);
}
@ -1083,5 +1084,7 @@ public abstract class Transliterator {
UnicodeToHexTransliterator.class, null);
registerClass(NullTransliterator._ID,
NullTransliterator.class, null);
registerClass(RemoveTransliterator._ID,
RemoveTransliterator.class, null);
}
}