fix problem with implicits
X-SVN-Rev: 14310
This commit is contained in:
parent
fb11ca2159
commit
9e3dba41a9
278
tools/unicodetools/com/ibm/text/UCA/Implicit.java
Normal file
278
tools/unicodetools/com/ibm/text/UCA/Implicit.java
Normal file
@ -0,0 +1,278 @@
|
||||
package com.ibm.text.UCA;
|
||||
|
||||
import com.ibm.text.utility.Utility;
|
||||
|
||||
/**
|
||||
* For generation of Implicit CEs
|
||||
* @author Davis
|
||||
*
|
||||
* Cleaned up so that changes can be made more easily.
|
||||
* Old values:
|
||||
# First Implicit: E26A792D
|
||||
# Last Implicit: E3DC70C0
|
||||
# First CJK: E0030300
|
||||
# Last CJK: E0A9DD00
|
||||
# First CJK_A: E0A9DF00
|
||||
# Last CJK_A: E0DE3100
|
||||
|
||||
*/
|
||||
public class Implicit {
|
||||
|
||||
/**
|
||||
* constants
|
||||
*/
|
||||
static final boolean DEBUG = false;
|
||||
|
||||
static final long topByte = 0xFF000000L;
|
||||
static final long bottomByte = 0xFFL;
|
||||
static final long fourBytes = 0xFFFFFFFFL;
|
||||
|
||||
static final int MAX_INPUT = 0x21FFFF;
|
||||
|
||||
/**
|
||||
* Testing function
|
||||
* @param args ignored
|
||||
*/
|
||||
public static void main(String[] args) {
|
||||
System.out.println("Start");
|
||||
try {
|
||||
Implicit foo = new Implicit(0xE0, 0xE4);
|
||||
int gap4 = foo.getGap4();
|
||||
int gap3 = foo.getGap3();
|
||||
int minTrail = foo.getMinTrail();
|
||||
int maxTrail = foo.getMaxTrail();
|
||||
long last = 0;
|
||||
long current;
|
||||
for (int i = 0; i <= MAX_INPUT; ++i) {
|
||||
current = foo.getImplicit(i) & fourBytes;
|
||||
long lastBottom = last & bottomByte;
|
||||
long currentBottom = current & bottomByte;
|
||||
long lastTop = last & topByte;
|
||||
long currentTop = current & topByte;
|
||||
|
||||
// do some consistency checks
|
||||
long gap = current - last;
|
||||
if (currentBottom != 0) { // if we are a 4-byte
|
||||
// gap has to be at least gap4
|
||||
// and gap from minTrail, maxTrail has to be at least gap4
|
||||
if (gap <= gap4) foo.throwError("Failed gap4 between", i);
|
||||
if (currentBottom < minTrail + gap4) foo.throwError("Failed gap4 before", i);
|
||||
if (currentBottom > maxTrail - gap4) foo.throwError("Failed gap4 after", i);
|
||||
} else { // we are a three-byte
|
||||
gap = gap >> 8; // move gap down for comparison.
|
||||
long current3Bottom = (current >> 8) & bottomByte;
|
||||
if (gap <= gap3) foo.throwError("Failed gap3 between ", i);
|
||||
if (current3Bottom < minTrail + gap3) foo.throwError("Failed gap3 before", i);
|
||||
if (current3Bottom > maxTrail - gap3) foo.throwError("Failed gap3 after", i);
|
||||
}
|
||||
// print out some values for spot-checking
|
||||
if (lastTop != currentTop || i == 0x10000 || i == 0x110000) {
|
||||
foo.show(i-3);
|
||||
foo.show(i-2);
|
||||
foo.show(i-1);
|
||||
if (i == 0) {
|
||||
// do nothing
|
||||
} else if (lastBottom == 0 && currentBottom != 0) {
|
||||
System.out.println("+ primary boundary, 4-byte CE's below");
|
||||
} else if (lastTop != currentTop) {
|
||||
System.out.println("+ primary boundary");
|
||||
}
|
||||
foo.show(i);
|
||||
foo.show(i+1);
|
||||
foo.show(i+2);
|
||||
System.out.println("...");
|
||||
}
|
||||
last = current;
|
||||
}
|
||||
foo.show(MAX_INPUT-2);
|
||||
foo.show(MAX_INPUT-1);
|
||||
foo.show(MAX_INPUT);
|
||||
} catch (Exception e) {
|
||||
e.printStackTrace();
|
||||
} finally {
|
||||
System.out.println("End");
|
||||
}
|
||||
}
|
||||
|
||||
private void throwError(String title, int i) {
|
||||
throw new IllegalArgumentException(title + "\t" + Utility.hex(i) + "\t" + Utility.hex(getImplicit(i) & fourBytes));
|
||||
}
|
||||
|
||||
private void show(int i) {
|
||||
if (i >= 0 && i <= MAX_INPUT) {
|
||||
System.out.println(Utility.hex(i) + "\t" + Utility.hex(getImplicit(i) & fourBytes));
|
||||
}
|
||||
}
|
||||
|
||||
/**
|
||||
* Precomputed by constructor
|
||||
*/
|
||||
int final3Multiplier;
|
||||
int final4Multiplier;
|
||||
int final3Count;
|
||||
int final4Count;
|
||||
int medialCount;
|
||||
int min3Primary;
|
||||
int min4Primary;
|
||||
int max4Primary;
|
||||
int minTrail;
|
||||
int maxTrail;
|
||||
int min4Boundary;
|
||||
|
||||
public int getGap4() {
|
||||
return final4Multiplier - 1;
|
||||
}
|
||||
|
||||
public int getGap3() {
|
||||
return final3Multiplier - 1;
|
||||
}
|
||||
|
||||
// old comment
|
||||
// we must skip all 00, 01, 02 bytes, so most bytes have 253 values
|
||||
// we must leave a gap of 01 between all values of the last byte, so the last byte has 126 values (3 byte case)
|
||||
// we shift so that HAN all has the same first primary, for compression.
|
||||
// for the 4 byte case, we make the gap as large as we can fit.
|
||||
// Three byte forms are EC xx xx, ED xx xx, EE xx xx (with a gap of 1)
|
||||
// Four byte forms (most supplementaries) are EF xx xx xx (with a gap of LAST2_MULTIPLIER == 14)
|
||||
|
||||
/**
|
||||
* Supply parameters for generating implicit CEs
|
||||
*/
|
||||
public Implicit(int minPrimary, int maxPrimary) {
|
||||
// 13 is the largest 4-byte gap we can use without getting 2 four-byte forms.
|
||||
this(minPrimary, maxPrimary, 0x04, 0xFE, 1, 15);
|
||||
}
|
||||
|
||||
/**
|
||||
* Set up to generate implicits.
|
||||
* @param minPrimary
|
||||
* @param maxPrimary
|
||||
* @param minTrail final byte
|
||||
* @param maxTrail final byte
|
||||
* @param gap3 the gap we leave for tailoring for 3-byte forms
|
||||
* @param gap4 the gap we leave for tailoring for 4-byte forms
|
||||
*/
|
||||
public Implicit(int minPrimary, int maxPrimary, int minTrail, int maxTrail, int gap3, int gap4) {
|
||||
// some simple parameter checks
|
||||
if (minPrimary < 0 || minPrimary >= maxPrimary || maxPrimary > 0xFF) throw new IllegalArgumentException("bad lead bytes");
|
||||
if (minTrail < 0 || minTrail >= maxTrail || maxTrail > 0xFF) throw new IllegalArgumentException("bad trail bytes");
|
||||
if (gap3 < 1 || gap4 < 1) throw new IllegalArgumentException("must have larger gaps");
|
||||
|
||||
this.minTrail = minTrail;
|
||||
this.maxTrail = maxTrail;
|
||||
|
||||
final3Multiplier = gap3 + 1;
|
||||
final4Multiplier = gap4 + 1;
|
||||
min3Primary = minPrimary;
|
||||
max4Primary = maxPrimary;
|
||||
// compute constants for use later.
|
||||
// number of values we can use in trailing bytes
|
||||
// leave room for empty values below, between, AND above, so
|
||||
// gap = 2:
|
||||
// range 3..7 => (3,4) 5 (6,7): so 1 value
|
||||
// range 3..8 => (3,4) 5 (6,7,8): so 1 value
|
||||
// range 3..9 => (3,4) 5 (6,7,8,9): so 1 value
|
||||
// range 3..10 => (3,4) 5 (6,7) 8 (9, 10): so 2 values
|
||||
final3Count = 1 + (maxTrail - minTrail - 1) / final3Multiplier;
|
||||
final4Count = 1 + (maxTrail - minTrail - 1) / final4Multiplier;
|
||||
// medials can use full range
|
||||
medialCount = (maxTrail - minTrail + 1);
|
||||
// find out how many values fit in each form
|
||||
int fourByteCount = medialCount * medialCount * final4Count;
|
||||
int threeByteCount = medialCount * final3Count;
|
||||
// now determine where the 3/4 boundary is.
|
||||
// we use 3 bytes below the boundary, and 4 above
|
||||
int primariesAvailable = maxPrimary - minPrimary + 1;
|
||||
int min4BytesNeeded = divideAndRoundUp(MAX_INPUT, fourByteCount);
|
||||
int min3BytesNeeded = primariesAvailable - min4BytesNeeded;
|
||||
if (min3BytesNeeded < 1) throw new IllegalArgumentException("Too few 3-byte implicits available.");
|
||||
int min3ByteCoverage = min3BytesNeeded * threeByteCount;
|
||||
min4Primary = minPrimary + min3BytesNeeded;
|
||||
min4Boundary = min3ByteCoverage;
|
||||
// Now expand out the multiplier for the 4 bytes, and redo.
|
||||
int totalNeeded = MAX_INPUT - min4Boundary;
|
||||
int neededPerPrimaryByte = divideAndRoundUp(totalNeeded, min4BytesNeeded);
|
||||
if (DEBUG) System.out.println("neededPerPrimaryByte: " + neededPerPrimaryByte);
|
||||
int neededPerFinalByte = divideAndRoundUp(neededPerPrimaryByte, medialCount * medialCount);
|
||||
if (DEBUG) System.out.println("neededPerFinalByte: " + neededPerFinalByte);
|
||||
int expandedGap = (maxTrail - minTrail - 1) / (neededPerFinalByte + 1) - 1;
|
||||
if (DEBUG) System.out.println("expandedGap: " + expandedGap);
|
||||
if (expandedGap < gap4) throw new IllegalArgumentException("must have larger gaps");
|
||||
final4Multiplier = expandedGap + 1;
|
||||
final4Count = neededPerFinalByte;
|
||||
if (DEBUG) {
|
||||
System.out.println("final4Count: " + final4Count);
|
||||
for (int counter = 0; counter <= final4Count; ++counter) {
|
||||
int value = minTrail + (1 + counter)*final4Multiplier;
|
||||
System.out.println(counter + "\t" + value + "\t" + Utility.hex(value));
|
||||
}
|
||||
}
|
||||
}
|
||||
|
||||
static public int divideAndRoundUp(int a, int b) {
|
||||
return 1 + (a-1)/b;
|
||||
}
|
||||
|
||||
/**
|
||||
* Generate the implicit CE, left shifted to put the first byte at the top of an int.
|
||||
* @param cp code point
|
||||
* @return
|
||||
*/
|
||||
public int getImplicit(int cp) {
|
||||
if (cp < 0 || cp > MAX_INPUT) {
|
||||
throw new IllegalArgumentException("Code point out of range " + Utility.hex(cp));
|
||||
}
|
||||
int last0 = cp - min4Boundary;
|
||||
if (last0 < 0) {
|
||||
int last1 = cp / final3Count;
|
||||
last0 = cp % final3Count;
|
||||
|
||||
int last2 = last1 / medialCount;
|
||||
last1 %= medialCount;
|
||||
|
||||
last0 = minTrail + (last0 + 1)*final3Multiplier - 1; // spread out, leaving gap at start
|
||||
last1 = minTrail + last1; // offset
|
||||
last2 = min3Primary + last2; // offset
|
||||
|
||||
if (last2 >= min4Primary) {
|
||||
throw new IllegalArgumentException("4-byte out of range: " + Utility.hex(cp) + ", " + Utility.hex(last2));
|
||||
}
|
||||
|
||||
return (last2 << 24) + (last1 << 16) + (last0 << 8);
|
||||
} else {
|
||||
int last1 = last0 / final4Count;
|
||||
last0 %= final4Count;
|
||||
|
||||
int last2 = last1 / medialCount;
|
||||
last1 %= medialCount;
|
||||
|
||||
int last3 = last2 / medialCount;
|
||||
last2 %= medialCount;
|
||||
|
||||
last0 = minTrail + (last0 + 1)*final4Multiplier - 1; // spread out, leaving gap at start
|
||||
last1 = minTrail + last1; // offset
|
||||
last2 = minTrail + last2; // offset
|
||||
last3 = min4Primary + last3; // offset
|
||||
|
||||
if (last3 > max4Primary) {
|
||||
throw new IllegalArgumentException("4-byte out of range: " + Utility.hex(cp) + ", " + Utility.hex(last3));
|
||||
}
|
||||
|
||||
return (last3 << 24) + (last2 << 16) + (last1 << 8) + last0;
|
||||
}
|
||||
}
|
||||
/**
|
||||
* @return
|
||||
*/
|
||||
public int getMinTrail() {
|
||||
return minTrail;
|
||||
}
|
||||
|
||||
/**
|
||||
* @return
|
||||
*/
|
||||
public int getMaxTrail() {
|
||||
return maxTrail;
|
||||
}
|
||||
|
||||
}
|
@ -5,8 +5,8 @@
|
||||
*******************************************************************************
|
||||
*
|
||||
* $Source: /xsrl/Nsvn/icu/unicodetools/com/ibm/text/UCA/Main.java,v $
|
||||
* $Date: 2003/08/22 16:51:21 $
|
||||
* $Revision: 1.16 $
|
||||
* $Date: 2004/01/13 18:32:12 $
|
||||
* $Revision: 1.17 $
|
||||
*
|
||||
*******************************************************************************
|
||||
*/
|
||||
@ -17,7 +17,7 @@ import com.ibm.text.utility.*;
|
||||
|
||||
|
||||
public class Main {
|
||||
static final String UCDVersion = "";
|
||||
static final String UCDVersion = "4.0.0";
|
||||
static final String[] ICU_FILES = {"writeCollationValidityLog", "writeFractionalUCA",
|
||||
"WriteRules", "WriteRulesXML", "writeconformance", "writeconformanceshifted",
|
||||
"short",
|
||||
@ -89,7 +89,7 @@ public class Main {
|
||||
else if (arg.equalsIgnoreCase("noCE")) noCE = !noCE;
|
||||
|
||||
else if (arg.equalsIgnoreCase("writeAllocation")) WriteCharts.writeAllocation();
|
||||
else if (arg.equalsIgnoreCase("probe")) Probe.test();
|
||||
// else if (arg.equalsIgnoreCase("probe")) Probe.test();
|
||||
|
||||
|
||||
else {
|
||||
|
@ -5,8 +5,8 @@
|
||||
*******************************************************************************
|
||||
*
|
||||
* $Source: /xsrl/Nsvn/icu/unicodetools/com/ibm/text/UCA/UCA_Types.java,v $
|
||||
* $Date: 2003/08/22 16:51:21 $
|
||||
* $Revision: 1.5 $
|
||||
* $Date: 2004/01/13 18:32:11 $
|
||||
* $Revision: 1.6 $
|
||||
*
|
||||
*******************************************************************************
|
||||
*/
|
||||
@ -20,7 +20,7 @@ public interface UCA_Types {
|
||||
* Version of the UCA tables to use
|
||||
*/
|
||||
//private static final String VERSION = "-3.0.1d3"; // ""; // "-2.1.9d7";
|
||||
public static final String UCA_BASE = "4.0.0d5"; // "3.1.1"; // ; // ""; // "-2.1.9d7";
|
||||
public static final String UCA_BASE = "4.0.0"; // "3.1.1"; // ; // ""; // "-2.1.9d7";
|
||||
public static final String VERSION = "-" + UCA_BASE; // + "d6" ""; // "-2.1.9d7";
|
||||
public static final String ALLFILES = "allkeys"; // null if not there
|
||||
|
||||
|
@ -5,8 +5,8 @@
|
||||
*******************************************************************************
|
||||
*
|
||||
* $Source: /xsrl/Nsvn/icu/unicodetools/com/ibm/text/UCA/WriteCollationData.java,v $
|
||||
* $Date: 2003/08/22 16:51:21 $
|
||||
* $Revision: 1.35 $
|
||||
* $Date: 2004/01/13 18:32:11 $
|
||||
* $Revision: 1.36 $
|
||||
*
|
||||
*******************************************************************************
|
||||
*/
|
||||
@ -319,10 +319,10 @@ U+01D5 LATIN CAPITAL LETTER U WITH DIAERESIS AND MACRON
|
||||
}
|
||||
}
|
||||
|
||||
PrintWriter log = Utility.openPrintWriter(UCA_GEN_DIR, filename + (shortPrint ? "_SHORT" : "") + ".txt", Utility.UTF8_WINDOWS);
|
||||
String fullFileName = filename + (shortPrint ? "_SHORT" : "") + ".txt";
|
||||
PrintWriter log = Utility.openPrintWriter(UCA_GEN_DIR, fullFileName, Utility.UTF8_WINDOWS);
|
||||
//if (!shortPrint) log.write('\uFEFF');
|
||||
log.println("# UCA Version: " + collator.getDataVersion() + "/" + collator.getUCDVersion());
|
||||
log.println("# Generated: " + getNormalDate());
|
||||
writeVersionAndDate(log, fullFileName);
|
||||
|
||||
System.out.println("Sorting");
|
||||
int counter = 0;
|
||||
@ -448,6 +448,14 @@ U+01D5 LATIN CAPITAL LETTER U WITH DIAERESIS AND MACRON
|
||||
System.out.println("Done");
|
||||
}
|
||||
|
||||
private static void writeVersionAndDate(PrintWriter log, String filename) {
|
||||
log.println("# File: " + filename);
|
||||
log.println("# UCA Version: " + collator.getDataVersion());
|
||||
log.println("# UCD Version: " + collator.getDataVersion());
|
||||
log.println("# Generated: " + getNormalDate());
|
||||
log.println();
|
||||
}
|
||||
|
||||
static void addStringX(int x, byte option) {
|
||||
addStringX(UTF32.valueOf32(x), option);
|
||||
}
|
||||
@ -703,7 +711,8 @@ U+01D5 LATIN CAPITAL LETTER U WITH DIAERESIS AND MACRON
|
||||
}*/
|
||||
|
||||
static void testCompatibilityCharacters() throws IOException {
|
||||
log = Utility.openPrintWriter(UCA_GEN_DIR, "UCA_CompatComparison.txt", Utility.UTF8_WINDOWS);
|
||||
String fullFileName = "UCA_CompatComparison.txt";
|
||||
log = Utility.openPrintWriter(UCA_GEN_DIR, fullFileName, Utility.UTF8_WINDOWS);
|
||||
|
||||
int[] kenCes = new int[50];
|
||||
int[] markCes = new int[50];
|
||||
@ -750,8 +759,9 @@ U+01D5 LATIN CAPITAL LETTER U WITH DIAERESIS AND MACRON
|
||||
Iterator it = forLater.keySet().iterator();
|
||||
byte oldType = (byte)0xFF; // anything unique
|
||||
int caseCount = 0;
|
||||
log.println("# UCA Version: " + collator.getDataVersion() + "/" + collator.getUCDVersion());
|
||||
log.println("Generated: " + getNormalDate());
|
||||
writeVersionAndDate(log, fullFileName);
|
||||
//log.println("# UCA Version: " + collator.getDataVersion() + "/" + collator.getUCDVersion());
|
||||
//log.println("Generated: " + getNormalDate());
|
||||
while (it.hasNext()) {
|
||||
String key = (String) it.next();
|
||||
byte type = (byte)key.charAt(0);
|
||||
@ -1197,7 +1207,8 @@ U+01D5 LATIN CAPITAL LETTER U WITH DIAERESIS AND MACRON
|
||||
"UTF8"),
|
||||
32*1024));
|
||||
*/
|
||||
PrintWriter diLog = Utility.openPrintWriter(UCA_GEN_DIR, "UCA_Contractions.txt", Utility.UTF8_WINDOWS);
|
||||
String fullFileName = "UCA_Contractions.txt";
|
||||
PrintWriter diLog = Utility.openPrintWriter(UCA_GEN_DIR, fullFileName, Utility.UTF8_WINDOWS);
|
||||
|
||||
diLog.write('\uFEFF');
|
||||
|
||||
@ -1209,8 +1220,9 @@ U+01D5 LATIN CAPITAL LETTER U WITH DIAERESIS AND MACRON
|
||||
int[] lenArray = new int[1];
|
||||
|
||||
diLog.println("# Contractions");
|
||||
diLog.println("# Generated " + getNormalDate());
|
||||
diLog.println("# UCA Version: " + collator.getDataVersion() + "/" + collator.getUCDVersion());
|
||||
writeVersionAndDate(diLog, fullFileName);
|
||||
//diLog.println("# Generated " + getNormalDate());
|
||||
//diLog.println("# UCA Version: " + collator.getDataVersion() + "/" + collator.getUCDVersion());
|
||||
while (true) {
|
||||
String s = cc.next(ces, lenArray);
|
||||
if (s == null) break;
|
||||
@ -3195,8 +3207,8 @@ F900..FAFF; CJK Compatibility Ideographs
|
||||
|
||||
if (firstTrailing.isUnset()) {
|
||||
System.out.println("No first/last trailing: resetting");
|
||||
firstTrailing.setValue(IMPLICIT_LIMIT_BYTE+1, COMMON, COMMON, "");
|
||||
lastTrailing.setValue(IMPLICIT_LIMIT_BYTE+1, COMMON, COMMON, "");
|
||||
firstTrailing.setValue(IMPLICIT_MAX_BYTE+1, COMMON, COMMON, "");
|
||||
lastTrailing.setValue(IMPLICIT_MAX_BYTE+1, COMMON, COMMON, "");
|
||||
System.out.println(firstTrailing.formatFCE());
|
||||
}
|
||||
|
||||
@ -3208,8 +3220,8 @@ F900..FAFF; CJK Compatibility Ideographs
|
||||
|
||||
log.println("# superceded! [top " + lastNonIgnorable.formatFCE() + "]");
|
||||
log.println("[fixed first implicit byte " + Utility.hex(IMPLICIT_BASE_BYTE,2) + "]");
|
||||
log.println("[fixed last implicit byte " + Utility.hex(IMPLICIT_LIMIT_BYTE,2) + "]");
|
||||
log.println("[fixed first trail byte " + Utility.hex(IMPLICIT_LIMIT_BYTE+1,2) + "]");
|
||||
log.println("[fixed last implicit byte " + Utility.hex(IMPLICIT_MAX_BYTE,2) + "]");
|
||||
log.println("[fixed first trail byte " + Utility.hex(IMPLICIT_MAX_BYTE+1,2) + "]");
|
||||
log.println("[fixed last trail byte " + Utility.hex(SPECIAL_BASE-1,2) + "]");
|
||||
log.println("[fixed first special byte " + Utility.hex(SPECIAL_BASE,2) + "]");
|
||||
log.println("[fixed last special byte " + Utility.hex(0xFF,2) + "]");
|
||||
@ -3509,7 +3521,7 @@ static int swapCJK(int i) {
|
||||
IMPLICIT_3BYTE_COUNT = 1,
|
||||
IMPLICIT_BASE_BYTE = 0xE0,
|
||||
|
||||
IMPLICIT_LIMIT_BYTE = IMPLICIT_BASE_BYTE + 4, // leave room for 1 3-byte and 2 4-byte forms
|
||||
IMPLICIT_MAX_BYTE = IMPLICIT_BASE_BYTE + 4, // leave room for 1 3-byte and 2 4-byte forms
|
||||
|
||||
IMPLICIT_4BYTE_BOUNDARY = IMPLICIT_3BYTE_COUNT * OTHER_COUNT * LAST_COUNT,
|
||||
LAST_MULTIPLIER = OTHER_COUNT / LAST_COUNT,
|
||||
@ -3534,49 +3546,10 @@ static int swapCJK(int i) {
|
||||
return getImplicitPrimaryFromSwapped(cp);
|
||||
}
|
||||
|
||||
static Implicit implicit = new Implicit(IMPLICIT_BASE_BYTE, IMPLICIT_MAX_BYTE);
|
||||
|
||||
static int getImplicitPrimaryFromSwapped(int cp) {
|
||||
|
||||
// we must skip all 00, 01, 02 bytes, so most bytes have 253 values
|
||||
// we must leave a gap of 01 between all values of the last byte, so the last byte has 126 values (3 byte case)
|
||||
// we shift so that HAN all has the same first primary, for compression.
|
||||
// for the 4 byte case, we make the gap as large as we can fit.
|
||||
// Three byte forms are EC xx xx, ED xx xx, EE xx xx (with a gap of 1)
|
||||
// Four byte forms (most supplementaries) are EF xx xx xx (with a gap of LAST2_MULTIPLIER == 14)
|
||||
|
||||
int last0 = cp - IMPLICIT_4BYTE_BOUNDARY;
|
||||
if (last0 < 0) {
|
||||
int last1 = cp / LAST_COUNT;
|
||||
last0 = cp % LAST_COUNT;
|
||||
|
||||
int last2 = last1 / OTHER_COUNT;
|
||||
last1 %= OTHER_COUNT;
|
||||
|
||||
if (DEBUG || last2 > 0xFF-BYTES_TO_AVOID) System.out.println("3B: " + Utility.hex(cp) + " => "
|
||||
+ Utility.hex(last2) + ", "
|
||||
+ Utility.hex(last1) + ", "
|
||||
+ Utility.hex(last0) + ", "
|
||||
);
|
||||
|
||||
return IMPLICIT_BASE_3BYTE + (last2 << 24) + (last1 << 16) + ((last0*LAST_MULTIPLIER) << 8);
|
||||
} else {
|
||||
int last1 = last0 / LAST_COUNT2;
|
||||
last0 %= LAST_COUNT2;
|
||||
|
||||
int last2 = last1 / OTHER_COUNT;
|
||||
last1 %= OTHER_COUNT;
|
||||
|
||||
int last3 = last2 / OTHER_COUNT;
|
||||
last2 %= OTHER_COUNT;
|
||||
|
||||
if (DEBUG || last3 > 0xFF-BYTES_TO_AVOID) System.out.println("4B: " + Utility.hex(cp) + " => "
|
||||
+ Utility.hex(last3) + ", "
|
||||
+ Utility.hex(last2) + ", "
|
||||
+ Utility.hex(last1) + ", "
|
||||
+ Utility.hex(last0 * LAST2_MULTIPLIER) + ", "
|
||||
);
|
||||
|
||||
return IMPLICIT_BASE_4BYTE + (last3 << 24) + (last2 << 16) + (last1 << 8) + (last0 * LAST2_MULTIPLIER);
|
||||
}
|
||||
return implicit.getImplicit(cp);
|
||||
}
|
||||
|
||||
|
||||
@ -3679,7 +3652,7 @@ static int swapCJK(int i) {
|
||||
long b2 = (newPrimary >> 8) & 0xFF;
|
||||
long b3 = newPrimary & 0xFF;
|
||||
|
||||
if (b0 < IMPLICIT_BASE_BYTE || b0 >= IMPLICIT_LIMIT_BYTE || b1 < 3 || b2 < 3 || b3 == 1 || b3 == 2) {
|
||||
if (b0 < IMPLICIT_BASE_BYTE || b0 > IMPLICIT_MAX_BYTE || b1 < 3 || b2 < 3 || b3 == 1 || b3 == 2) {
|
||||
throw new IllegalArgumentException(Utility.hex(i) + ": illegal byte value: " + Utility.hex(newPrimary)
|
||||
+ ", " + Utility.hex(b1) + ", " + Utility.hex(b2) + ", " + Utility.hex(b3));
|
||||
}
|
||||
|
Loading…
Reference in New Issue
Block a user