ICU-3289 defer static initialization to shorten transliterator service startup time

X-SVN-Rev: 14548
This commit is contained in:
Alan Liu 2004-02-20 00:16:41 +00:00
parent dc20632138
commit db4e20d1e4
2 changed files with 59 additions and 34 deletions

View File

@ -13,7 +13,7 @@ import com.ibm.icu.lang.*;
/**
* @author Alan Liu
* @version $RCSfile: NormalizationTransliterator.java,v $ $Revision: 1.21 $ $Date: 2003/12/20 03:07:10 $
* @version $RCSfile: NormalizationTransliterator.java,v $ $Revision: 1.22 $ $Date: 2004/02/20 00:16:34 $
*/
final class NormalizationTransliterator extends Transliterator {
@ -46,8 +46,8 @@ final class NormalizationTransliterator extends Transliterator {
D = 0, C = 1, KD= 2, KC = 3;
// Instance data, simply pointer to one of the sets below
final UnicodeSet UNSAFE_START;
final UnicodeSet SKIPPABLE;
final UnicodeSet unsafeStart;
final UnicodeSet skippable;
/**
* System registration hook.
@ -130,8 +130,11 @@ final class NormalizationTransliterator extends Transliterator {
super(id, null);
mode = m;
options = opt;
UNSAFE_START = UNSAFE_STARTS[startChoice];
SKIPPABLE = SKIPPABLES[startChoice];
if (UNSAFE_STARTS[startChoice] == null) {
initStatics(startChoice);
}
unsafeStart = UNSAFE_STARTS[startChoice];
skippable = SKIPPABLES[startChoice];
}
/**
@ -158,7 +161,7 @@ final class NormalizationTransliterator extends Transliterator {
int cp;
for (int i = start+1; i < limit; i += UTF16.getCharCount(cp)) {
cp = text.char32At(i);
if (UCharacter.getCombiningClass(cp) == 0 && !UNSAFE_START.contains(cp)) {
if (UCharacter.getCombiningClass(cp) == 0 && !unsafeStart.contains(cp)) {
int delta = convert(text, lastSafe, i, null);
i += delta;
limit += delta;
@ -171,8 +174,8 @@ final class NormalizationTransliterator extends Transliterator {
overallDelta += delta;
lastSafe = limit + delta;
} else {
// We are incremental, so accept the last characters IF they turn into SKIPPABLEs
int delta = convert(text, lastSafe, limit, SKIPPABLE);
// We are incremental, so accept the last characters IF they turn into skippables
int delta = convert(text, lastSafe, limit, skippable);
if (delta != Integer.MIN_VALUE) {
overallDelta += delta;
lastSafe = limit + delta;
@ -202,7 +205,7 @@ final class NormalizationTransliterator extends Transliterator {
// verify OK, if specified
if (verify != null) {
boolean skip = !SKIPPABLE.containsAll(output);
boolean skip = !skippable.containsAll(output);
if (DEBUG) {
System.out.println((skip ? " SKIP: " : "NOSKIP: ")
+ com.ibm.icu.impl.Utility.escape(input)
@ -220,18 +223,16 @@ final class NormalizationTransliterator extends Transliterator {
private char buffer[] = new char[30];
static {
UNSAFE_STARTS[D] = new UnicodeSet("[\u0F73\u0F75\u0F81]", false);
UNSAFE_STARTS[C] = new UnicodeSet("[\u09BE\u09D7\u0B3E\u0B56-\u0B57\u0BBE\u0BD7\u0CC2\u0CD5-\u0CD6"
+ "\u0D3E\u0D57\u0DCF\u0DDF\u0F73\u0F75\u0F81\u102E\u1161-\u1175\u11A8-\u11C2]", false);
UNSAFE_STARTS[KD] = new UnicodeSet("[\u0F73\u0F75\u0F81\uFF9E-\uFF9F]", false);
UNSAFE_STARTS[KC] = new UnicodeSet("[\u09BE\u09D7\u0B3E\u0B56-\u0B57\u0BBE\u0BD7\u0CC2\u0CD5-\u0CD6"
+ "\u0D3E\u0D57\u0DCF\u0DDF\u0F73\u0F75\u0F81\u102E\u1161-\u1175\u11A8-\u11C2\u3133\u3135-\u3136"
+ "\u313A-\u313F\u314F-\u3163\uFF9E-\uFF9F\uFFA3\uFFA5-\uFFA6\uFFAA-\uFFAF\uFFC2-\uFFC7\uFFCA-\uFFCF"
+ "\uFFD2-\uFFD7\uFFDA-\uFFDC]", false);
SKIPPABLES[D] = new UnicodeSet(
"[^\\u00C0-\\u00C5\\u00C7-\\u00CF\\u00D1-\\u00D6\\u00D9-\\u00DD"
/**
* Initialize statics for the given mode. This is slow, so we
* defer it.
*/
private static final void initStatics(int startChoice) {
switch (startChoice) {
case D:
UNSAFE_STARTS[D] = new UnicodeSet("[\u0F73\u0F75\u0F81]", false);
SKIPPABLES[D] = new UnicodeSet(
"[^\\u00C0-\\u00C5\\u00C7-\\u00CF\\u00D1-\\u00D6\\u00D9-\\u00DD"
+ "\\u00E0-\\u00E5\\u00E7-\\u00EF\\u00F1-\\u00F6\\u00F9-\\u00FD"
+ "\\u00FF-\\u010F\\u0112-\\u0125\\u0128-\\u0130\\u0134-\\u0137"
+ "\\u0139-\\u013E\\u0143-\\u0148\\u014C-\\u0151\\u0154-\\u0165"
@ -283,9 +284,12 @@ final class NormalizationTransliterator extends Transliterator {
+ "\\U0001D16D-\\U0001D172\\U0001D17B-\\U0001D182\\U0001D185-"
+ "\\U0001D18B\\U0001D1AA-\\U0001D1AD\\U0001D1BB-\\U0001D1C0\\U0002"
+ "F800-\\U0002FA1D]", false);
SKIPPABLES[C] = new UnicodeSet(
"[^<->A-PR-Za-pr-z\\u00A8\\u00C0-\\u00CF\\u00D1-\\u00D6\\u00D8-"
break;
case C:
UNSAFE_STARTS[C] = new UnicodeSet("[\u09BE\u09D7\u0B3E\u0B56-\u0B57\u0BBE\u0BD7\u0CC2\u0CD5-\u0CD6"
+ "\u0D3E\u0D57\u0DCF\u0DDF\u0F73\u0F75\u0F81\u102E\u1161-\u1175\u11A8-\u11C2]", false);
SKIPPABLES[C] = new UnicodeSet(
"[^<->A-PR-Za-pr-z\\u00A8\\u00C0-\\u00CF\\u00D1-\\u00D6\\u00D8-"
+ "\\u00DD\\u00E0-\\u00EF\\u00F1-\\u00F6\\u00F8-\\u00FD\\u00FF-"
+ "\\u0103\\u0106-\\u010F\\u0112-\\u0117\\u011A-\\u0121\\u0124-"
+ "\\u0125\\u0128-\\u012D\\u0130\\u0139-\\u013A\\u013D-\\u013E"
@ -390,9 +394,11 @@ final class NormalizationTransliterator extends Transliterator {
+ "\\U0001D15E-\\U0001D169\\U0001D16D-\\U0001D172\\U0001D17B-"
+ "\\U0001D182\\U0001D185-\\U0001D18B\\U0001D1AA-\\U0001D1AD\\U0001"
+ "D1BB-\\U0001D1C0\\U0002F800-\\U0002FA1D]", false);
SKIPPABLES[KD] = new UnicodeSet(
"[^\\u00A0\\u00A8\\u00AA\\u00AF\\u00B2-\\u00B5\\u00B8-\\u00BA"
break;
case KD:
UNSAFE_STARTS[KD] = new UnicodeSet("[\u0F73\u0F75\u0F81\uFF9E-\uFF9F]", false);
SKIPPABLES[KD] = new UnicodeSet(
"[^\\u00A0\\u00A8\\u00AA\\u00AF\\u00B2-\\u00B5\\u00B8-\\u00BA"
+ "\\u00BC-\\u00BE\\u00C0-\\u00C5\\u00C7-\\u00CF\\u00D1-\\u00D6"
+ "\\u00D9-\\u00DD\\u00E0-\\u00E5\\u00E7-\\u00EF\\u00F1-\\u00F6"
+ "\\u00F9-\\u00FD\\u00FF-\\u010F\\u0112-\\u0125\\u0128-\\u0130"
@ -469,9 +475,14 @@ final class NormalizationTransliterator extends Transliterator {
+ "\\U0001D540-\\U0001D544\\U0001D546\\U0001D54A-\\U0001D550\\U0001"
+ "D552-\\U0001D6A3\\U0001D6A8-\\U0001D7C9\\U0001D7CE-\\U0001D7FF"
+ "\\U0002F800-\\U0002FA1D]", false);
SKIPPABLES[KC] = new UnicodeSet(
"[^<->A-PR-Za-pr-z\\u00A0\\u00A8\\u00AA\\u00AF\\u00B2-\\u00B5"
break;
case KC:
UNSAFE_STARTS[KC] = new UnicodeSet("[\u09BE\u09D7\u0B3E\u0B56-\u0B57\u0BBE\u0BD7\u0CC2\u0CD5-\u0CD6"
+ "\u0D3E\u0D57\u0DCF\u0DDF\u0F73\u0F75\u0F81\u102E\u1161-\u1175\u11A8-\u11C2\u3133\u3135-\u3136"
+ "\u313A-\u313F\u314F-\u3163\uFF9E-\uFF9F\uFFA3\uFFA5-\uFFA6\uFFAA-\uFFAF\uFFC2-\uFFC7\uFFCA-\uFFCF"
+ "\uFFD2-\uFFD7\uFFDA-\uFFDC]", false);
SKIPPABLES[KC] = new UnicodeSet(
"[^<->A-PR-Za-pr-z\\u00A0\\u00A8\\u00AA\\u00AF\\u00B2-\\u00B5"
+ "\\u00B8-\\u00BA\\u00BC-\\u00BE\\u00C0-\\u00CF\\u00D1-\\u00D6"
+ "\\u00D8-\\u00DD\\u00E0-\\u00EF\\u00F1-\\u00F6\\u00F8-\\u00FD"
+ "\\u00FF-\\u0103\\u0106-\\u010F\\u0112-\\u0117\\u011A-\\u0121"
@ -602,5 +613,7 @@ final class NormalizationTransliterator extends Transliterator {
+ "\\U0001D53B-\\U0001D53E\\U0001D540-\\U0001D544\\U0001D546\\U0001"
+ "D54A-\\U0001D550\\U0001D552-\\U0001D6A3\\U0001D6A8-\\U0001D7C9"
+ "\\U0001D7CE-\\U0001D7FF\\U0002F800-\\U0002FA1D]", false);
break;
}
}
}

View File

@ -3,8 +3,8 @@
* others. All Rights Reserved.
*
* $Source: /xsrl/Nsvn/icu/icu4j/src/com/ibm/icu/text/TitlecaseTransliterator.java,v $
* $Date: 2002/10/31 22:36:48 $
* $Revision: 1.18 $
* $Date: 2004/02/20 00:16:41 $
* $Revision: 1.19 $
*/
package com.ibm.icu.text;
import java.util.*;
@ -27,13 +27,22 @@ class TitlecaseTransliterator extends Transliterator {
* The set of characters we skip. These are neither cased nor
* non-cased, to us; we copy them verbatim.
*/
static final UnicodeSet SKIP = new UnicodeSet("[\u00AD \u2019 \\' [:Mn:] [:Me:] [:Cf:] [:Lm:] [:Sk:]]");
static UnicodeSet SKIP = null;
/**
* The set of characters that cause the next non-SKIP character
* to be lowercased.
*/
static final UnicodeSet CASED = new UnicodeSet("[[:Lu:] [:Ll:] [:Lt:]]");
static UnicodeSet CASED = null;
/**
* Initialize static variables. We defer intilization because it
* is slow (typically over 1000 ms).
*/
private static final void initStatics() {
SKIP = new UnicodeSet("[\u00AD \u2019 \\' [:Mn:] [:Me:] [:Cf:] [:Lm:] [:Sk:]]");
CASED = new UnicodeSet("[[:Lu:] [:Ll:] [:Lt:]]");
}
/**
* System registration hook.
@ -63,6 +72,9 @@ class TitlecaseTransliterator extends Transliterator {
*/
protected void handleTransliterate(Replaceable text,
Position offsets, boolean incremental) {
if (SKIP == null) {
initStatics();
}
// Our mode; we are either converting letter toTitle or
// toLower.