ICU-3289 defer static initialization to shorten transliterator service startup time

X-SVN-Rev: 14548
This commit is contained in:
Alan Liu 2004-02-20 00:16:41 +00:00
parent dc20632138
commit db4e20d1e4
2 changed files with 59 additions and 34 deletions

View File

@ -13,7 +13,7 @@ import com.ibm.icu.lang.*;
/** /**
* @author Alan Liu * @author Alan Liu
* @version $RCSfile: NormalizationTransliterator.java,v $ $Revision: 1.21 $ $Date: 2003/12/20 03:07:10 $ * @version $RCSfile: NormalizationTransliterator.java,v $ $Revision: 1.22 $ $Date: 2004/02/20 00:16:34 $
*/ */
final class NormalizationTransliterator extends Transliterator { final class NormalizationTransliterator extends Transliterator {
@ -46,8 +46,8 @@ final class NormalizationTransliterator extends Transliterator {
D = 0, C = 1, KD= 2, KC = 3; D = 0, C = 1, KD= 2, KC = 3;
// Instance data, simply pointer to one of the sets below // Instance data, simply pointer to one of the sets below
final UnicodeSet UNSAFE_START; final UnicodeSet unsafeStart;
final UnicodeSet SKIPPABLE; final UnicodeSet skippable;
/** /**
* System registration hook. * System registration hook.
@ -130,8 +130,11 @@ final class NormalizationTransliterator extends Transliterator {
super(id, null); super(id, null);
mode = m; mode = m;
options = opt; options = opt;
UNSAFE_START = UNSAFE_STARTS[startChoice]; if (UNSAFE_STARTS[startChoice] == null) {
SKIPPABLE = SKIPPABLES[startChoice]; initStatics(startChoice);
}
unsafeStart = UNSAFE_STARTS[startChoice];
skippable = SKIPPABLES[startChoice];
} }
/** /**
@ -158,7 +161,7 @@ final class NormalizationTransliterator extends Transliterator {
int cp; int cp;
for (int i = start+1; i < limit; i += UTF16.getCharCount(cp)) { for (int i = start+1; i < limit; i += UTF16.getCharCount(cp)) {
cp = text.char32At(i); cp = text.char32At(i);
if (UCharacter.getCombiningClass(cp) == 0 && !UNSAFE_START.contains(cp)) { if (UCharacter.getCombiningClass(cp) == 0 && !unsafeStart.contains(cp)) {
int delta = convert(text, lastSafe, i, null); int delta = convert(text, lastSafe, i, null);
i += delta; i += delta;
limit += delta; limit += delta;
@ -171,8 +174,8 @@ final class NormalizationTransliterator extends Transliterator {
overallDelta += delta; overallDelta += delta;
lastSafe = limit + delta; lastSafe = limit + delta;
} else { } else {
// We are incremental, so accept the last characters IF they turn into SKIPPABLEs // We are incremental, so accept the last characters IF they turn into skippables
int delta = convert(text, lastSafe, limit, SKIPPABLE); int delta = convert(text, lastSafe, limit, skippable);
if (delta != Integer.MIN_VALUE) { if (delta != Integer.MIN_VALUE) {
overallDelta += delta; overallDelta += delta;
lastSafe = limit + delta; lastSafe = limit + delta;
@ -202,7 +205,7 @@ final class NormalizationTransliterator extends Transliterator {
// verify OK, if specified // verify OK, if specified
if (verify != null) { if (verify != null) {
boolean skip = !SKIPPABLE.containsAll(output); boolean skip = !skippable.containsAll(output);
if (DEBUG) { if (DEBUG) {
System.out.println((skip ? " SKIP: " : "NOSKIP: ") System.out.println((skip ? " SKIP: " : "NOSKIP: ")
+ com.ibm.icu.impl.Utility.escape(input) + com.ibm.icu.impl.Utility.escape(input)
@ -220,18 +223,16 @@ final class NormalizationTransliterator extends Transliterator {
private char buffer[] = new char[30]; private char buffer[] = new char[30];
static { /**
UNSAFE_STARTS[D] = new UnicodeSet("[\u0F73\u0F75\u0F81]", false); * Initialize statics for the given mode. This is slow, so we
UNSAFE_STARTS[C] = new UnicodeSet("[\u09BE\u09D7\u0B3E\u0B56-\u0B57\u0BBE\u0BD7\u0CC2\u0CD5-\u0CD6" * defer it.
+ "\u0D3E\u0D57\u0DCF\u0DDF\u0F73\u0F75\u0F81\u102E\u1161-\u1175\u11A8-\u11C2]", false); */
UNSAFE_STARTS[KD] = new UnicodeSet("[\u0F73\u0F75\u0F81\uFF9E-\uFF9F]", false); private static final void initStatics(int startChoice) {
UNSAFE_STARTS[KC] = new UnicodeSet("[\u09BE\u09D7\u0B3E\u0B56-\u0B57\u0BBE\u0BD7\u0CC2\u0CD5-\u0CD6" switch (startChoice) {
+ "\u0D3E\u0D57\u0DCF\u0DDF\u0F73\u0F75\u0F81\u102E\u1161-\u1175\u11A8-\u11C2\u3133\u3135-\u3136" case D:
+ "\u313A-\u313F\u314F-\u3163\uFF9E-\uFF9F\uFFA3\uFFA5-\uFFA6\uFFAA-\uFFAF\uFFC2-\uFFC7\uFFCA-\uFFCF" UNSAFE_STARTS[D] = new UnicodeSet("[\u0F73\u0F75\u0F81]", false);
+ "\uFFD2-\uFFD7\uFFDA-\uFFDC]", false); SKIPPABLES[D] = new UnicodeSet(
"[^\\u00C0-\\u00C5\\u00C7-\\u00CF\\u00D1-\\u00D6\\u00D9-\\u00DD"
SKIPPABLES[D] = new UnicodeSet(
"[^\\u00C0-\\u00C5\\u00C7-\\u00CF\\u00D1-\\u00D6\\u00D9-\\u00DD"
+ "\\u00E0-\\u00E5\\u00E7-\\u00EF\\u00F1-\\u00F6\\u00F9-\\u00FD" + "\\u00E0-\\u00E5\\u00E7-\\u00EF\\u00F1-\\u00F6\\u00F9-\\u00FD"
+ "\\u00FF-\\u010F\\u0112-\\u0125\\u0128-\\u0130\\u0134-\\u0137" + "\\u00FF-\\u010F\\u0112-\\u0125\\u0128-\\u0130\\u0134-\\u0137"
+ "\\u0139-\\u013E\\u0143-\\u0148\\u014C-\\u0151\\u0154-\\u0165" + "\\u0139-\\u013E\\u0143-\\u0148\\u014C-\\u0151\\u0154-\\u0165"
@ -283,9 +284,12 @@ final class NormalizationTransliterator extends Transliterator {
+ "\\U0001D16D-\\U0001D172\\U0001D17B-\\U0001D182\\U0001D185-" + "\\U0001D16D-\\U0001D172\\U0001D17B-\\U0001D182\\U0001D185-"
+ "\\U0001D18B\\U0001D1AA-\\U0001D1AD\\U0001D1BB-\\U0001D1C0\\U0002" + "\\U0001D18B\\U0001D1AA-\\U0001D1AD\\U0001D1BB-\\U0001D1C0\\U0002"
+ "F800-\\U0002FA1D]", false); + "F800-\\U0002FA1D]", false);
break;
SKIPPABLES[C] = new UnicodeSet( case C:
"[^<->A-PR-Za-pr-z\\u00A8\\u00C0-\\u00CF\\u00D1-\\u00D6\\u00D8-" UNSAFE_STARTS[C] = new UnicodeSet("[\u09BE\u09D7\u0B3E\u0B56-\u0B57\u0BBE\u0BD7\u0CC2\u0CD5-\u0CD6"
+ "\u0D3E\u0D57\u0DCF\u0DDF\u0F73\u0F75\u0F81\u102E\u1161-\u1175\u11A8-\u11C2]", false);
SKIPPABLES[C] = new UnicodeSet(
"[^<->A-PR-Za-pr-z\\u00A8\\u00C0-\\u00CF\\u00D1-\\u00D6\\u00D8-"
+ "\\u00DD\\u00E0-\\u00EF\\u00F1-\\u00F6\\u00F8-\\u00FD\\u00FF-" + "\\u00DD\\u00E0-\\u00EF\\u00F1-\\u00F6\\u00F8-\\u00FD\\u00FF-"
+ "\\u0103\\u0106-\\u010F\\u0112-\\u0117\\u011A-\\u0121\\u0124-" + "\\u0103\\u0106-\\u010F\\u0112-\\u0117\\u011A-\\u0121\\u0124-"
+ "\\u0125\\u0128-\\u012D\\u0130\\u0139-\\u013A\\u013D-\\u013E" + "\\u0125\\u0128-\\u012D\\u0130\\u0139-\\u013A\\u013D-\\u013E"
@ -390,9 +394,11 @@ final class NormalizationTransliterator extends Transliterator {
+ "\\U0001D15E-\\U0001D169\\U0001D16D-\\U0001D172\\U0001D17B-" + "\\U0001D15E-\\U0001D169\\U0001D16D-\\U0001D172\\U0001D17B-"
+ "\\U0001D182\\U0001D185-\\U0001D18B\\U0001D1AA-\\U0001D1AD\\U0001" + "\\U0001D182\\U0001D185-\\U0001D18B\\U0001D1AA-\\U0001D1AD\\U0001"
+ "D1BB-\\U0001D1C0\\U0002F800-\\U0002FA1D]", false); + "D1BB-\\U0001D1C0\\U0002F800-\\U0002FA1D]", false);
break;
SKIPPABLES[KD] = new UnicodeSet( case KD:
"[^\\u00A0\\u00A8\\u00AA\\u00AF\\u00B2-\\u00B5\\u00B8-\\u00BA" UNSAFE_STARTS[KD] = new UnicodeSet("[\u0F73\u0F75\u0F81\uFF9E-\uFF9F]", false);
SKIPPABLES[KD] = new UnicodeSet(
"[^\\u00A0\\u00A8\\u00AA\\u00AF\\u00B2-\\u00B5\\u00B8-\\u00BA"
+ "\\u00BC-\\u00BE\\u00C0-\\u00C5\\u00C7-\\u00CF\\u00D1-\\u00D6" + "\\u00BC-\\u00BE\\u00C0-\\u00C5\\u00C7-\\u00CF\\u00D1-\\u00D6"
+ "\\u00D9-\\u00DD\\u00E0-\\u00E5\\u00E7-\\u00EF\\u00F1-\\u00F6" + "\\u00D9-\\u00DD\\u00E0-\\u00E5\\u00E7-\\u00EF\\u00F1-\\u00F6"
+ "\\u00F9-\\u00FD\\u00FF-\\u010F\\u0112-\\u0125\\u0128-\\u0130" + "\\u00F9-\\u00FD\\u00FF-\\u010F\\u0112-\\u0125\\u0128-\\u0130"
@ -469,9 +475,14 @@ final class NormalizationTransliterator extends Transliterator {
+ "\\U0001D540-\\U0001D544\\U0001D546\\U0001D54A-\\U0001D550\\U0001" + "\\U0001D540-\\U0001D544\\U0001D546\\U0001D54A-\\U0001D550\\U0001"
+ "D552-\\U0001D6A3\\U0001D6A8-\\U0001D7C9\\U0001D7CE-\\U0001D7FF" + "D552-\\U0001D6A3\\U0001D6A8-\\U0001D7C9\\U0001D7CE-\\U0001D7FF"
+ "\\U0002F800-\\U0002FA1D]", false); + "\\U0002F800-\\U0002FA1D]", false);
break;
SKIPPABLES[KC] = new UnicodeSet( case KC:
"[^<->A-PR-Za-pr-z\\u00A0\\u00A8\\u00AA\\u00AF\\u00B2-\\u00B5" UNSAFE_STARTS[KC] = new UnicodeSet("[\u09BE\u09D7\u0B3E\u0B56-\u0B57\u0BBE\u0BD7\u0CC2\u0CD5-\u0CD6"
+ "\u0D3E\u0D57\u0DCF\u0DDF\u0F73\u0F75\u0F81\u102E\u1161-\u1175\u11A8-\u11C2\u3133\u3135-\u3136"
+ "\u313A-\u313F\u314F-\u3163\uFF9E-\uFF9F\uFFA3\uFFA5-\uFFA6\uFFAA-\uFFAF\uFFC2-\uFFC7\uFFCA-\uFFCF"
+ "\uFFD2-\uFFD7\uFFDA-\uFFDC]", false);
SKIPPABLES[KC] = new UnicodeSet(
"[^<->A-PR-Za-pr-z\\u00A0\\u00A8\\u00AA\\u00AF\\u00B2-\\u00B5"
+ "\\u00B8-\\u00BA\\u00BC-\\u00BE\\u00C0-\\u00CF\\u00D1-\\u00D6" + "\\u00B8-\\u00BA\\u00BC-\\u00BE\\u00C0-\\u00CF\\u00D1-\\u00D6"
+ "\\u00D8-\\u00DD\\u00E0-\\u00EF\\u00F1-\\u00F6\\u00F8-\\u00FD" + "\\u00D8-\\u00DD\\u00E0-\\u00EF\\u00F1-\\u00F6\\u00F8-\\u00FD"
+ "\\u00FF-\\u0103\\u0106-\\u010F\\u0112-\\u0117\\u011A-\\u0121" + "\\u00FF-\\u0103\\u0106-\\u010F\\u0112-\\u0117\\u011A-\\u0121"
@ -602,5 +613,7 @@ final class NormalizationTransliterator extends Transliterator {
+ "\\U0001D53B-\\U0001D53E\\U0001D540-\\U0001D544\\U0001D546\\U0001" + "\\U0001D53B-\\U0001D53E\\U0001D540-\\U0001D544\\U0001D546\\U0001"
+ "D54A-\\U0001D550\\U0001D552-\\U0001D6A3\\U0001D6A8-\\U0001D7C9" + "D54A-\\U0001D550\\U0001D552-\\U0001D6A3\\U0001D6A8-\\U0001D7C9"
+ "\\U0001D7CE-\\U0001D7FF\\U0002F800-\\U0002FA1D]", false); + "\\U0001D7CE-\\U0001D7FF\\U0002F800-\\U0002FA1D]", false);
break;
}
} }
} }

View File

@ -3,8 +3,8 @@
* others. All Rights Reserved. * others. All Rights Reserved.
* *
* $Source: /xsrl/Nsvn/icu/icu4j/src/com/ibm/icu/text/TitlecaseTransliterator.java,v $ * $Source: /xsrl/Nsvn/icu/icu4j/src/com/ibm/icu/text/TitlecaseTransliterator.java,v $
* $Date: 2002/10/31 22:36:48 $ * $Date: 2004/02/20 00:16:41 $
* $Revision: 1.18 $ * $Revision: 1.19 $
*/ */
package com.ibm.icu.text; package com.ibm.icu.text;
import java.util.*; import java.util.*;
@ -27,13 +27,22 @@ class TitlecaseTransliterator extends Transliterator {
* The set of characters we skip. These are neither cased nor * The set of characters we skip. These are neither cased nor
* non-cased, to us; we copy them verbatim. * non-cased, to us; we copy them verbatim.
*/ */
static final UnicodeSet SKIP = new UnicodeSet("[\u00AD \u2019 \\' [:Mn:] [:Me:] [:Cf:] [:Lm:] [:Sk:]]"); static UnicodeSet SKIP = null;
/** /**
* The set of characters that cause the next non-SKIP character * The set of characters that cause the next non-SKIP character
* to be lowercased. * to be lowercased.
*/ */
static final UnicodeSet CASED = new UnicodeSet("[[:Lu:] [:Ll:] [:Lt:]]"); static UnicodeSet CASED = null;
/**
* Initialize static variables. We defer intilization because it
* is slow (typically over 1000 ms).
*/
private static final void initStatics() {
SKIP = new UnicodeSet("[\u00AD \u2019 \\' [:Mn:] [:Me:] [:Cf:] [:Lm:] [:Sk:]]");
CASED = new UnicodeSet("[[:Lu:] [:Ll:] [:Lt:]]");
}
/** /**
* System registration hook. * System registration hook.
@ -63,6 +72,9 @@ class TitlecaseTransliterator extends Transliterator {
*/ */
protected void handleTransliterate(Replaceable text, protected void handleTransliterate(Replaceable text,
Position offsets, boolean incremental) { Position offsets, boolean incremental) {
if (SKIP == null) {
initStatics();
}
// Our mode; we are either converting letter toTitle or // Our mode; we are either converting letter toTitle or
// toLower. // toLower.