Port Any-Name and Name-Any from icu4c
X-SVN-Rev: 5155
This commit is contained in:
parent
a22ea7d8af
commit
db3c833b55
@ -5,8 +5,8 @@
|
|||||||
*******************************************************************************
|
*******************************************************************************
|
||||||
*
|
*
|
||||||
* $Source: /xsrl/Nsvn/icu/icu4j/src/com/ibm/icu/dev/test/translit/TransliteratorTest.java,v $
|
* $Source: /xsrl/Nsvn/icu/icu4j/src/com/ibm/icu/dev/test/translit/TransliteratorTest.java,v $
|
||||||
* $Date: 2001/07/02 19:44:10 $
|
* $Date: 2001/07/02 20:54:51 $
|
||||||
* $Revision: 1.38 $
|
* $Revision: 1.39 $
|
||||||
*
|
*
|
||||||
*****************************************************************************************
|
*****************************************************************************************
|
||||||
*/
|
*/
|
||||||
@ -795,6 +795,26 @@ public class TransliteratorTest extends TestFmwk {
|
|||||||
"The Quick Brown FoX Can't Jump Over The LaZy Dogs.");
|
"The Quick Brown FoX Can't Jump Over The LaZy Dogs.");
|
||||||
}
|
}
|
||||||
|
|
||||||
|
/**
|
||||||
|
* Test the name mapping transliterators.
|
||||||
|
*/
|
||||||
|
public void TestNameMap() {
|
||||||
|
Transliterator uni2name =
|
||||||
|
Transliterator.getInstance("Any-Name[^abc]");
|
||||||
|
Transliterator name2uni =
|
||||||
|
Transliterator.getInstance("Name-Any");
|
||||||
|
|
||||||
|
/// NOTE NOTE NOTE NOTE NOTE NOTE NOTE
|
||||||
|
// The results in icu4j and icu4c are different:
|
||||||
|
// icu4c: CJK UNIFIED IDEOGRAPH-4E01
|
||||||
|
// icu4j: CJK UNIFIED IDEOGRAPH-4e01
|
||||||
|
|
||||||
|
expect(uni2name, "\u00A0abc\u4E01\u00B5\u0A81\uFFFD\uFFFF",
|
||||||
|
"{NO-BREAK SPACE}abc{CJK UNIFIED IDEOGRAPH-4e01}{MICRO SIGN}{GUJARATI SIGN CANDRABINDU}{REPLACEMENT CHARACTER}\uFFFF");
|
||||||
|
expect(name2uni, "{ NO-BREAK SPACE}abc{ CJK UNIFIED IDEOGRAPH-4E01 }{x{MICRO SIGN}{GUJARATI SIGN CANDRABINDU}{REPLACEMENT CHARACTER}{",
|
||||||
|
"\u00A0abc\u4E01{x\u00B5\u0A81\uFFFD{");
|
||||||
|
}
|
||||||
|
|
||||||
/**
|
/**
|
||||||
* Test the normalization transliterator.
|
* Test the normalization transliterator.
|
||||||
*/
|
*/
|
||||||
|
161
icu4j/src/com/ibm/icu/text/NameUnicodeTransliterator.java
Executable file
161
icu4j/src/com/ibm/icu/text/NameUnicodeTransliterator.java
Executable file
@ -0,0 +1,161 @@
|
|||||||
|
/*
|
||||||
|
* Copyright (C) 1996-2001, International Business Machines Corporation and
|
||||||
|
* others. All Rights Reserved.
|
||||||
|
*
|
||||||
|
* $Source: /xsrl/Nsvn/icu/icu4j/src/com/ibm/icu/text/NameUnicodeTransliterator.java,v $
|
||||||
|
* $Date: 2001/07/02 20:55:29 $
|
||||||
|
* $Revision: 1.1 $
|
||||||
|
*/
|
||||||
|
package com.ibm.text;
|
||||||
|
import java.util.*;
|
||||||
|
|
||||||
|
/**
|
||||||
|
* A transliterator that performs name to character mapping.
|
||||||
|
* @author Alan Liu
|
||||||
|
*/
|
||||||
|
public class NameUnicodeTransliterator extends Transliterator {
|
||||||
|
|
||||||
|
char openDelimiter;
|
||||||
|
char closeDelimiter;
|
||||||
|
|
||||||
|
static final String _ID = "Name-Any";
|
||||||
|
|
||||||
|
/**
|
||||||
|
* System registration hook.
|
||||||
|
*/
|
||||||
|
static void register() {
|
||||||
|
Transliterator.registerFactory(_ID, new Transliterator.Factory() {
|
||||||
|
public Transliterator getInstance() {
|
||||||
|
return new NameUnicodeTransliterator(null);
|
||||||
|
}
|
||||||
|
});
|
||||||
|
}
|
||||||
|
|
||||||
|
/**
|
||||||
|
* Constructs a transliterator.
|
||||||
|
*/
|
||||||
|
public NameUnicodeTransliterator(char openDelimiter, char closeDelimiter,
|
||||||
|
UnicodeFilter filter) {
|
||||||
|
super(_ID, filter);
|
||||||
|
this.openDelimiter = openDelimiter;
|
||||||
|
this.closeDelimiter = closeDelimiter;
|
||||||
|
}
|
||||||
|
|
||||||
|
/**
|
||||||
|
* Constructs a transliterator with the default delimiters '{' and
|
||||||
|
* '}'.
|
||||||
|
*/
|
||||||
|
public NameUnicodeTransliterator(UnicodeFilter filter) {
|
||||||
|
this('{', '}', filter);
|
||||||
|
}
|
||||||
|
|
||||||
|
/**
|
||||||
|
* Implements {@link Transliterator#handleTransliterate}.
|
||||||
|
*/
|
||||||
|
protected void handleTransliterate(Replaceable text,
|
||||||
|
Position offsets, boolean isIncremental) {
|
||||||
|
// Longest name as of 3.0.0 is 83
|
||||||
|
final int LONGEST_NAME = 83;
|
||||||
|
|
||||||
|
// Accomodate the longest possible name plus padding
|
||||||
|
char[] buf = new char[LONGEST_NAME + 8];
|
||||||
|
|
||||||
|
// The only characters used in names are (as of Unicode 3.0.0):
|
||||||
|
// -0123456789ABCDEFGHIJKLMNOPQRSTUVWXYZ
|
||||||
|
// (first character is a space).
|
||||||
|
|
||||||
|
int cursor = offsets.start;
|
||||||
|
int limit = offsets.limit;
|
||||||
|
|
||||||
|
// Modes:
|
||||||
|
// 0 - looking for open delimiter
|
||||||
|
// 1 - after open delimiter
|
||||||
|
int mode = 0;
|
||||||
|
int ibuf = 0;
|
||||||
|
int openPos = offsets.start; // position of openDelimiter
|
||||||
|
|
||||||
|
for (; cursor < limit; ++cursor) {
|
||||||
|
char c = filteredCharAt(text, cursor);
|
||||||
|
|
||||||
|
switch (mode) {
|
||||||
|
case 0: // looking for open delimiter
|
||||||
|
if (c == openDelimiter) {
|
||||||
|
openPos = cursor;
|
||||||
|
mode = 1;
|
||||||
|
ibuf = 0;
|
||||||
|
}
|
||||||
|
break;
|
||||||
|
|
||||||
|
case 1: // after open delimiter
|
||||||
|
// Look for [-a-zA-Z0-9]. If \w+ is found, convert it
|
||||||
|
// to a single space. If closeDelimiter is found, exit
|
||||||
|
// the loop. If any other character is found, exit the
|
||||||
|
// loop. If the limit is found, exit the loop.
|
||||||
|
if (UCharacter.isWhitespace(c)) {
|
||||||
|
// Ignore leading whitespace
|
||||||
|
if (ibuf != 0 && buf[ibuf-1] != (char)0x0020) {
|
||||||
|
buf[ibuf++] = (char)0x0020 /* */;
|
||||||
|
// If we go a bit past the longest possible name then abort
|
||||||
|
if (ibuf == (LONGEST_NAME + 4)) {
|
||||||
|
mode = 0;
|
||||||
|
}
|
||||||
|
}
|
||||||
|
continue;
|
||||||
|
}
|
||||||
|
|
||||||
|
if (c == closeDelimiter) {
|
||||||
|
// Delete trailing space, if any
|
||||||
|
if (ibuf > 0 && buf[ibuf-1] == (char)0x0020) {
|
||||||
|
--ibuf;
|
||||||
|
}
|
||||||
|
int ch = UCharacter.getCharFromName(new String(buf, 0, ibuf));
|
||||||
|
if (ch != -1) {
|
||||||
|
// Lookup succeeded
|
||||||
|
text.replace(openPos, cursor+1, String.valueOf((char) ch));
|
||||||
|
|
||||||
|
// Adjust indices for the change in the length of
|
||||||
|
// the string. Do not assume that str.length() ==
|
||||||
|
// 1, in case of surrogates.
|
||||||
|
int delta = cursor + 1 - openPos - 1/*str.length()*/;
|
||||||
|
cursor -= delta;
|
||||||
|
limit -= delta;
|
||||||
|
// assert(cursor == openPos + str.length());
|
||||||
|
}
|
||||||
|
// If the lookup failed, we leave things as-is and
|
||||||
|
// still switch to mode 0 and continue.
|
||||||
|
mode = 0;
|
||||||
|
continue;
|
||||||
|
}
|
||||||
|
|
||||||
|
if (c >= (char)0x0061 && c <= (char)0x007A) {
|
||||||
|
c -= 0x0020; // [a-z] => [A-Z]
|
||||||
|
}
|
||||||
|
|
||||||
|
// Check if c =~ [-A-Z0-9]
|
||||||
|
if (c == (char)0x002D ||
|
||||||
|
(c >= (char)0x0041 && c <= (char)0x005A) ||
|
||||||
|
(c >= (char)0x0030 && c <= (char)0x0039)) {
|
||||||
|
buf[ibuf++] = (char) c;
|
||||||
|
// If we go a bit past the longest possible name then abort
|
||||||
|
if (ibuf == (LONGEST_NAME + 4)) {
|
||||||
|
mode = 0;
|
||||||
|
}
|
||||||
|
}
|
||||||
|
|
||||||
|
// Invalid character
|
||||||
|
else {
|
||||||
|
--cursor; // Backup and reprocess this character
|
||||||
|
mode = 0;
|
||||||
|
}
|
||||||
|
|
||||||
|
break;
|
||||||
|
}
|
||||||
|
}
|
||||||
|
|
||||||
|
offsets.contextLimit += limit - offsets.limit;
|
||||||
|
offsets.limit = limit;
|
||||||
|
// In incremental mode, only advance the cursor up to the last
|
||||||
|
// open delimiter, if we are in mode 1.
|
||||||
|
offsets.start = (mode == 1 && isIncremental) ? openPos : cursor;
|
||||||
|
}
|
||||||
|
}
|
@ -5,8 +5,8 @@
|
|||||||
*******************************************************************************
|
*******************************************************************************
|
||||||
*
|
*
|
||||||
* $Source: /xsrl/Nsvn/icu/icu4j/src/com/ibm/icu/text/Transliterator.java,v $
|
* $Source: /xsrl/Nsvn/icu/icu4j/src/com/ibm/icu/text/Transliterator.java,v $
|
||||||
* $Date: 2001/06/29 22:50:25 $
|
* $Date: 2001/07/02 20:55:29 $
|
||||||
* $Revision: 1.35 $
|
* $Revision: 1.36 $
|
||||||
*
|
*
|
||||||
*****************************************************************************************
|
*****************************************************************************************
|
||||||
*/
|
*/
|
||||||
@ -240,7 +240,7 @@ import com.ibm.text.resources.ResourceReader;
|
|||||||
* <p>Copyright © IBM Corporation 1999. All rights reserved.
|
* <p>Copyright © IBM Corporation 1999. All rights reserved.
|
||||||
*
|
*
|
||||||
* @author Alan Liu
|
* @author Alan Liu
|
||||||
* @version $RCSfile: Transliterator.java,v $ $Revision: 1.35 $ $Date: 2001/06/29 22:50:25 $
|
* @version $RCSfile: Transliterator.java,v $ $Revision: 1.36 $ $Date: 2001/07/02 20:55:29 $
|
||||||
*/
|
*/
|
||||||
public abstract class Transliterator {
|
public abstract class Transliterator {
|
||||||
/**
|
/**
|
||||||
@ -1100,6 +1100,8 @@ public abstract class Transliterator {
|
|||||||
LowercaseTransliterator.register();
|
LowercaseTransliterator.register();
|
||||||
UppercaseTransliterator.register();
|
UppercaseTransliterator.register();
|
||||||
TitlecaseTransliterator.register();
|
TitlecaseTransliterator.register();
|
||||||
|
UnicodeNameTransliterator.register();
|
||||||
|
NameUnicodeTransliterator.register();
|
||||||
NormalizationTransliterator.register();
|
NormalizationTransliterator.register();
|
||||||
}
|
}
|
||||||
|
|
||||||
|
87
icu4j/src/com/ibm/icu/text/UnicodeNameTransliterator.java
Executable file
87
icu4j/src/com/ibm/icu/text/UnicodeNameTransliterator.java
Executable file
@ -0,0 +1,87 @@
|
|||||||
|
/*
|
||||||
|
* Copyright (C) 1996-2001, International Business Machines Corporation and
|
||||||
|
* others. All Rights Reserved.
|
||||||
|
*
|
||||||
|
* $Source: /xsrl/Nsvn/icu/icu4j/src/com/ibm/icu/text/UnicodeNameTransliterator.java,v $
|
||||||
|
* $Date: 2001/07/02 20:55:29 $
|
||||||
|
* $Revision: 1.1 $
|
||||||
|
*/
|
||||||
|
package com.ibm.text;
|
||||||
|
import java.util.*;
|
||||||
|
|
||||||
|
/**
|
||||||
|
* A transliterator that performs character to name mapping.
|
||||||
|
* @author Alan Liu
|
||||||
|
*/
|
||||||
|
public class UnicodeNameTransliterator extends Transliterator {
|
||||||
|
|
||||||
|
char openDelimiter;
|
||||||
|
char closeDelimiter;
|
||||||
|
|
||||||
|
static final String _ID = "Any-Name";
|
||||||
|
|
||||||
|
/**
|
||||||
|
* System registration hook.
|
||||||
|
*/
|
||||||
|
static void register() {
|
||||||
|
Transliterator.registerFactory(_ID, new Transliterator.Factory() {
|
||||||
|
public Transliterator getInstance() {
|
||||||
|
return new UnicodeNameTransliterator(null);
|
||||||
|
}
|
||||||
|
});
|
||||||
|
}
|
||||||
|
|
||||||
|
/**
|
||||||
|
* Constructs a transliterator.
|
||||||
|
*/
|
||||||
|
public UnicodeNameTransliterator(char openDelimiter, char closeDelimiter,
|
||||||
|
UnicodeFilter filter) {
|
||||||
|
super(_ID, filter);
|
||||||
|
this.openDelimiter = openDelimiter;
|
||||||
|
this.closeDelimiter = closeDelimiter;
|
||||||
|
}
|
||||||
|
|
||||||
|
/**
|
||||||
|
* Constructs a transliterator with the default delimiters '{' and
|
||||||
|
* '}'.
|
||||||
|
*/
|
||||||
|
public UnicodeNameTransliterator(UnicodeFilter filter) {
|
||||||
|
this('{', '}', filter);
|
||||||
|
}
|
||||||
|
|
||||||
|
/**
|
||||||
|
* Implements {@link Transliterator#handleTransliterate}.
|
||||||
|
*/
|
||||||
|
protected void handleTransliterate(Replaceable text,
|
||||||
|
Position offsets, boolean isIncremental) {
|
||||||
|
int cursor = offsets.start;
|
||||||
|
int limit = offsets.limit;
|
||||||
|
|
||||||
|
UnicodeFilter filt = getFilter();
|
||||||
|
StringBuffer str = new StringBuffer();
|
||||||
|
str.append(openDelimiter);
|
||||||
|
int len;
|
||||||
|
String name;
|
||||||
|
|
||||||
|
while (cursor < limit) {
|
||||||
|
char c = text.charAt(cursor);
|
||||||
|
if ((filt == null || filt.contains(c)) &&
|
||||||
|
(name=UCharacter.getName(c)) != null) {
|
||||||
|
|
||||||
|
str.setLength(1);
|
||||||
|
str.append(name).append(closeDelimiter);
|
||||||
|
|
||||||
|
text.replace(cursor, cursor+1, str.toString());
|
||||||
|
len = str.length();
|
||||||
|
cursor += len; // advance cursor by 1 and adjust for new text
|
||||||
|
limit += len-1; // change in length is (len - 1)
|
||||||
|
} else {
|
||||||
|
++cursor;
|
||||||
|
}
|
||||||
|
}
|
||||||
|
|
||||||
|
offsets.contextLimit += limit - offsets.limit;
|
||||||
|
offsets.limit = limit;
|
||||||
|
offsets.start = cursor;
|
||||||
|
}
|
||||||
|
}
|
@ -5,8 +5,8 @@
|
|||||||
*******************************************************************************
|
*******************************************************************************
|
||||||
*
|
*
|
||||||
* $Source: /xsrl/Nsvn/icu/icu4j/src/com/ibm/test/translit/Attic/TransliteratorTest.java,v $
|
* $Source: /xsrl/Nsvn/icu/icu4j/src/com/ibm/test/translit/Attic/TransliteratorTest.java,v $
|
||||||
* $Date: 2001/07/02 19:44:10 $
|
* $Date: 2001/07/02 20:54:51 $
|
||||||
* $Revision: 1.38 $
|
* $Revision: 1.39 $
|
||||||
*
|
*
|
||||||
*****************************************************************************************
|
*****************************************************************************************
|
||||||
*/
|
*/
|
||||||
@ -795,6 +795,26 @@ public class TransliteratorTest extends TestFmwk {
|
|||||||
"The Quick Brown FoX Can't Jump Over The LaZy Dogs.");
|
"The Quick Brown FoX Can't Jump Over The LaZy Dogs.");
|
||||||
}
|
}
|
||||||
|
|
||||||
|
/**
|
||||||
|
* Test the name mapping transliterators.
|
||||||
|
*/
|
||||||
|
public void TestNameMap() {
|
||||||
|
Transliterator uni2name =
|
||||||
|
Transliterator.getInstance("Any-Name[^abc]");
|
||||||
|
Transliterator name2uni =
|
||||||
|
Transliterator.getInstance("Name-Any");
|
||||||
|
|
||||||
|
/// NOTE NOTE NOTE NOTE NOTE NOTE NOTE
|
||||||
|
// The results in icu4j and icu4c are different:
|
||||||
|
// icu4c: CJK UNIFIED IDEOGRAPH-4E01
|
||||||
|
// icu4j: CJK UNIFIED IDEOGRAPH-4e01
|
||||||
|
|
||||||
|
expect(uni2name, "\u00A0abc\u4E01\u00B5\u0A81\uFFFD\uFFFF",
|
||||||
|
"{NO-BREAK SPACE}abc{CJK UNIFIED IDEOGRAPH-4e01}{MICRO SIGN}{GUJARATI SIGN CANDRABINDU}{REPLACEMENT CHARACTER}\uFFFF");
|
||||||
|
expect(name2uni, "{ NO-BREAK SPACE}abc{ CJK UNIFIED IDEOGRAPH-4E01 }{x{MICRO SIGN}{GUJARATI SIGN CANDRABINDU}{REPLACEMENT CHARACTER}{",
|
||||||
|
"\u00A0abc\u4E01{x\u00B5\u0A81\uFFFD{");
|
||||||
|
}
|
||||||
|
|
||||||
/**
|
/**
|
||||||
* Test the normalization transliterator.
|
* Test the normalization transliterator.
|
||||||
*/
|
*/
|
||||||
|
161
icu4j/src/com/ibm/text/NameUnicodeTransliterator.java
Executable file
161
icu4j/src/com/ibm/text/NameUnicodeTransliterator.java
Executable file
@ -0,0 +1,161 @@
|
|||||||
|
/*
|
||||||
|
* Copyright (C) 1996-2001, International Business Machines Corporation and
|
||||||
|
* others. All Rights Reserved.
|
||||||
|
*
|
||||||
|
* $Source: /xsrl/Nsvn/icu/icu4j/src/com/ibm/text/Attic/NameUnicodeTransliterator.java,v $
|
||||||
|
* $Date: 2001/07/02 20:55:29 $
|
||||||
|
* $Revision: 1.1 $
|
||||||
|
*/
|
||||||
|
package com.ibm.text;
|
||||||
|
import java.util.*;
|
||||||
|
|
||||||
|
/**
|
||||||
|
* A transliterator that performs name to character mapping.
|
||||||
|
* @author Alan Liu
|
||||||
|
*/
|
||||||
|
public class NameUnicodeTransliterator extends Transliterator {
|
||||||
|
|
||||||
|
char openDelimiter;
|
||||||
|
char closeDelimiter;
|
||||||
|
|
||||||
|
static final String _ID = "Name-Any";
|
||||||
|
|
||||||
|
/**
|
||||||
|
* System registration hook.
|
||||||
|
*/
|
||||||
|
static void register() {
|
||||||
|
Transliterator.registerFactory(_ID, new Transliterator.Factory() {
|
||||||
|
public Transliterator getInstance() {
|
||||||
|
return new NameUnicodeTransliterator(null);
|
||||||
|
}
|
||||||
|
});
|
||||||
|
}
|
||||||
|
|
||||||
|
/**
|
||||||
|
* Constructs a transliterator.
|
||||||
|
*/
|
||||||
|
public NameUnicodeTransliterator(char openDelimiter, char closeDelimiter,
|
||||||
|
UnicodeFilter filter) {
|
||||||
|
super(_ID, filter);
|
||||||
|
this.openDelimiter = openDelimiter;
|
||||||
|
this.closeDelimiter = closeDelimiter;
|
||||||
|
}
|
||||||
|
|
||||||
|
/**
|
||||||
|
* Constructs a transliterator with the default delimiters '{' and
|
||||||
|
* '}'.
|
||||||
|
*/
|
||||||
|
public NameUnicodeTransliterator(UnicodeFilter filter) {
|
||||||
|
this('{', '}', filter);
|
||||||
|
}
|
||||||
|
|
||||||
|
/**
|
||||||
|
* Implements {@link Transliterator#handleTransliterate}.
|
||||||
|
*/
|
||||||
|
protected void handleTransliterate(Replaceable text,
|
||||||
|
Position offsets, boolean isIncremental) {
|
||||||
|
// Longest name as of 3.0.0 is 83
|
||||||
|
final int LONGEST_NAME = 83;
|
||||||
|
|
||||||
|
// Accomodate the longest possible name plus padding
|
||||||
|
char[] buf = new char[LONGEST_NAME + 8];
|
||||||
|
|
||||||
|
// The only characters used in names are (as of Unicode 3.0.0):
|
||||||
|
// -0123456789ABCDEFGHIJKLMNOPQRSTUVWXYZ
|
||||||
|
// (first character is a space).
|
||||||
|
|
||||||
|
int cursor = offsets.start;
|
||||||
|
int limit = offsets.limit;
|
||||||
|
|
||||||
|
// Modes:
|
||||||
|
// 0 - looking for open delimiter
|
||||||
|
// 1 - after open delimiter
|
||||||
|
int mode = 0;
|
||||||
|
int ibuf = 0;
|
||||||
|
int openPos = offsets.start; // position of openDelimiter
|
||||||
|
|
||||||
|
for (; cursor < limit; ++cursor) {
|
||||||
|
char c = filteredCharAt(text, cursor);
|
||||||
|
|
||||||
|
switch (mode) {
|
||||||
|
case 0: // looking for open delimiter
|
||||||
|
if (c == openDelimiter) {
|
||||||
|
openPos = cursor;
|
||||||
|
mode = 1;
|
||||||
|
ibuf = 0;
|
||||||
|
}
|
||||||
|
break;
|
||||||
|
|
||||||
|
case 1: // after open delimiter
|
||||||
|
// Look for [-a-zA-Z0-9]. If \w+ is found, convert it
|
||||||
|
// to a single space. If closeDelimiter is found, exit
|
||||||
|
// the loop. If any other character is found, exit the
|
||||||
|
// loop. If the limit is found, exit the loop.
|
||||||
|
if (UCharacter.isWhitespace(c)) {
|
||||||
|
// Ignore leading whitespace
|
||||||
|
if (ibuf != 0 && buf[ibuf-1] != (char)0x0020) {
|
||||||
|
buf[ibuf++] = (char)0x0020 /* */;
|
||||||
|
// If we go a bit past the longest possible name then abort
|
||||||
|
if (ibuf == (LONGEST_NAME + 4)) {
|
||||||
|
mode = 0;
|
||||||
|
}
|
||||||
|
}
|
||||||
|
continue;
|
||||||
|
}
|
||||||
|
|
||||||
|
if (c == closeDelimiter) {
|
||||||
|
// Delete trailing space, if any
|
||||||
|
if (ibuf > 0 && buf[ibuf-1] == (char)0x0020) {
|
||||||
|
--ibuf;
|
||||||
|
}
|
||||||
|
int ch = UCharacter.getCharFromName(new String(buf, 0, ibuf));
|
||||||
|
if (ch != -1) {
|
||||||
|
// Lookup succeeded
|
||||||
|
text.replace(openPos, cursor+1, String.valueOf((char) ch));
|
||||||
|
|
||||||
|
// Adjust indices for the change in the length of
|
||||||
|
// the string. Do not assume that str.length() ==
|
||||||
|
// 1, in case of surrogates.
|
||||||
|
int delta = cursor + 1 - openPos - 1/*str.length()*/;
|
||||||
|
cursor -= delta;
|
||||||
|
limit -= delta;
|
||||||
|
// assert(cursor == openPos + str.length());
|
||||||
|
}
|
||||||
|
// If the lookup failed, we leave things as-is and
|
||||||
|
// still switch to mode 0 and continue.
|
||||||
|
mode = 0;
|
||||||
|
continue;
|
||||||
|
}
|
||||||
|
|
||||||
|
if (c >= (char)0x0061 && c <= (char)0x007A) {
|
||||||
|
c -= 0x0020; // [a-z] => [A-Z]
|
||||||
|
}
|
||||||
|
|
||||||
|
// Check if c =~ [-A-Z0-9]
|
||||||
|
if (c == (char)0x002D ||
|
||||||
|
(c >= (char)0x0041 && c <= (char)0x005A) ||
|
||||||
|
(c >= (char)0x0030 && c <= (char)0x0039)) {
|
||||||
|
buf[ibuf++] = (char) c;
|
||||||
|
// If we go a bit past the longest possible name then abort
|
||||||
|
if (ibuf == (LONGEST_NAME + 4)) {
|
||||||
|
mode = 0;
|
||||||
|
}
|
||||||
|
}
|
||||||
|
|
||||||
|
// Invalid character
|
||||||
|
else {
|
||||||
|
--cursor; // Backup and reprocess this character
|
||||||
|
mode = 0;
|
||||||
|
}
|
||||||
|
|
||||||
|
break;
|
||||||
|
}
|
||||||
|
}
|
||||||
|
|
||||||
|
offsets.contextLimit += limit - offsets.limit;
|
||||||
|
offsets.limit = limit;
|
||||||
|
// In incremental mode, only advance the cursor up to the last
|
||||||
|
// open delimiter, if we are in mode 1.
|
||||||
|
offsets.start = (mode == 1 && isIncremental) ? openPos : cursor;
|
||||||
|
}
|
||||||
|
}
|
@ -5,8 +5,8 @@
|
|||||||
*******************************************************************************
|
*******************************************************************************
|
||||||
*
|
*
|
||||||
* $Source: /xsrl/Nsvn/icu/icu4j/src/com/ibm/text/Attic/Transliterator.java,v $
|
* $Source: /xsrl/Nsvn/icu/icu4j/src/com/ibm/text/Attic/Transliterator.java,v $
|
||||||
* $Date: 2001/06/29 22:50:25 $
|
* $Date: 2001/07/02 20:55:29 $
|
||||||
* $Revision: 1.35 $
|
* $Revision: 1.36 $
|
||||||
*
|
*
|
||||||
*****************************************************************************************
|
*****************************************************************************************
|
||||||
*/
|
*/
|
||||||
@ -240,7 +240,7 @@ import com.ibm.text.resources.ResourceReader;
|
|||||||
* <p>Copyright © IBM Corporation 1999. All rights reserved.
|
* <p>Copyright © IBM Corporation 1999. All rights reserved.
|
||||||
*
|
*
|
||||||
* @author Alan Liu
|
* @author Alan Liu
|
||||||
* @version $RCSfile: Transliterator.java,v $ $Revision: 1.35 $ $Date: 2001/06/29 22:50:25 $
|
* @version $RCSfile: Transliterator.java,v $ $Revision: 1.36 $ $Date: 2001/07/02 20:55:29 $
|
||||||
*/
|
*/
|
||||||
public abstract class Transliterator {
|
public abstract class Transliterator {
|
||||||
/**
|
/**
|
||||||
@ -1100,6 +1100,8 @@ public abstract class Transliterator {
|
|||||||
LowercaseTransliterator.register();
|
LowercaseTransliterator.register();
|
||||||
UppercaseTransliterator.register();
|
UppercaseTransliterator.register();
|
||||||
TitlecaseTransliterator.register();
|
TitlecaseTransliterator.register();
|
||||||
|
UnicodeNameTransliterator.register();
|
||||||
|
NameUnicodeTransliterator.register();
|
||||||
NormalizationTransliterator.register();
|
NormalizationTransliterator.register();
|
||||||
}
|
}
|
||||||
|
|
||||||
|
87
icu4j/src/com/ibm/text/UnicodeNameTransliterator.java
Executable file
87
icu4j/src/com/ibm/text/UnicodeNameTransliterator.java
Executable file
@ -0,0 +1,87 @@
|
|||||||
|
/*
|
||||||
|
* Copyright (C) 1996-2001, International Business Machines Corporation and
|
||||||
|
* others. All Rights Reserved.
|
||||||
|
*
|
||||||
|
* $Source: /xsrl/Nsvn/icu/icu4j/src/com/ibm/text/Attic/UnicodeNameTransliterator.java,v $
|
||||||
|
* $Date: 2001/07/02 20:55:29 $
|
||||||
|
* $Revision: 1.1 $
|
||||||
|
*/
|
||||||
|
package com.ibm.text;
|
||||||
|
import java.util.*;
|
||||||
|
|
||||||
|
/**
|
||||||
|
* A transliterator that performs character to name mapping.
|
||||||
|
* @author Alan Liu
|
||||||
|
*/
|
||||||
|
public class UnicodeNameTransliterator extends Transliterator {
|
||||||
|
|
||||||
|
char openDelimiter;
|
||||||
|
char closeDelimiter;
|
||||||
|
|
||||||
|
static final String _ID = "Any-Name";
|
||||||
|
|
||||||
|
/**
|
||||||
|
* System registration hook.
|
||||||
|
*/
|
||||||
|
static void register() {
|
||||||
|
Transliterator.registerFactory(_ID, new Transliterator.Factory() {
|
||||||
|
public Transliterator getInstance() {
|
||||||
|
return new UnicodeNameTransliterator(null);
|
||||||
|
}
|
||||||
|
});
|
||||||
|
}
|
||||||
|
|
||||||
|
/**
|
||||||
|
* Constructs a transliterator.
|
||||||
|
*/
|
||||||
|
public UnicodeNameTransliterator(char openDelimiter, char closeDelimiter,
|
||||||
|
UnicodeFilter filter) {
|
||||||
|
super(_ID, filter);
|
||||||
|
this.openDelimiter = openDelimiter;
|
||||||
|
this.closeDelimiter = closeDelimiter;
|
||||||
|
}
|
||||||
|
|
||||||
|
/**
|
||||||
|
* Constructs a transliterator with the default delimiters '{' and
|
||||||
|
* '}'.
|
||||||
|
*/
|
||||||
|
public UnicodeNameTransliterator(UnicodeFilter filter) {
|
||||||
|
this('{', '}', filter);
|
||||||
|
}
|
||||||
|
|
||||||
|
/**
|
||||||
|
* Implements {@link Transliterator#handleTransliterate}.
|
||||||
|
*/
|
||||||
|
protected void handleTransliterate(Replaceable text,
|
||||||
|
Position offsets, boolean isIncremental) {
|
||||||
|
int cursor = offsets.start;
|
||||||
|
int limit = offsets.limit;
|
||||||
|
|
||||||
|
UnicodeFilter filt = getFilter();
|
||||||
|
StringBuffer str = new StringBuffer();
|
||||||
|
str.append(openDelimiter);
|
||||||
|
int len;
|
||||||
|
String name;
|
||||||
|
|
||||||
|
while (cursor < limit) {
|
||||||
|
char c = text.charAt(cursor);
|
||||||
|
if ((filt == null || filt.contains(c)) &&
|
||||||
|
(name=UCharacter.getName(c)) != null) {
|
||||||
|
|
||||||
|
str.setLength(1);
|
||||||
|
str.append(name).append(closeDelimiter);
|
||||||
|
|
||||||
|
text.replace(cursor, cursor+1, str.toString());
|
||||||
|
len = str.length();
|
||||||
|
cursor += len; // advance cursor by 1 and adjust for new text
|
||||||
|
limit += len-1; // change in length is (len - 1)
|
||||||
|
} else {
|
||||||
|
++cursor;
|
||||||
|
}
|
||||||
|
}
|
||||||
|
|
||||||
|
offsets.contextLimit += limit - offsets.limit;
|
||||||
|
offsets.limit = limit;
|
||||||
|
offsets.start = cursor;
|
||||||
|
}
|
||||||
|
}
|
Loading…
Reference in New Issue
Block a user