ICU-6137 Move new static methods to CharsetISO2022 and make certain static methods and variables package viewable.
X-SVN-Rev: 23411
This commit is contained in:
parent
2bc412e598
commit
4daa54728a
@ -515,58 +515,58 @@ class CharsetMBCS extends CharsetICU {
|
||||
/**
|
||||
* MBCS action codes for conversions to Unicode. These values are in bits 23..20 of the state table entries.
|
||||
*/
|
||||
private static final int MBCS_STATE_VALID_DIRECT_16 = 0;
|
||||
private static final int MBCS_STATE_VALID_DIRECT_20 = MBCS_STATE_VALID_DIRECT_16 + 1;
|
||||
private static final int MBCS_STATE_FALLBACK_DIRECT_16 = MBCS_STATE_VALID_DIRECT_20 + 1;
|
||||
private static final int MBCS_STATE_FALLBACK_DIRECT_20 = MBCS_STATE_FALLBACK_DIRECT_16 + 1;
|
||||
private static final int MBCS_STATE_VALID_16 = MBCS_STATE_FALLBACK_DIRECT_20 + 1;
|
||||
private static final int MBCS_STATE_VALID_16_PAIR = MBCS_STATE_VALID_16 + 1;
|
||||
private static final int MBCS_STATE_UNASSIGNED = MBCS_STATE_VALID_16_PAIR + 1;
|
||||
private static final int MBCS_STATE_ILLEGAL = MBCS_STATE_UNASSIGNED + 1;
|
||||
private static final int MBCS_STATE_CHANGE_ONLY = MBCS_STATE_ILLEGAL + 1;
|
||||
static final int MBCS_STATE_VALID_DIRECT_16 = 0;
|
||||
static final int MBCS_STATE_VALID_DIRECT_20 = MBCS_STATE_VALID_DIRECT_16 + 1;
|
||||
static final int MBCS_STATE_FALLBACK_DIRECT_16 = MBCS_STATE_VALID_DIRECT_20 + 1;
|
||||
static final int MBCS_STATE_FALLBACK_DIRECT_20 = MBCS_STATE_FALLBACK_DIRECT_16 + 1;
|
||||
static final int MBCS_STATE_VALID_16 = MBCS_STATE_FALLBACK_DIRECT_20 + 1;
|
||||
static final int MBCS_STATE_VALID_16_PAIR = MBCS_STATE_VALID_16 + 1;
|
||||
static final int MBCS_STATE_UNASSIGNED = MBCS_STATE_VALID_16_PAIR + 1;
|
||||
static final int MBCS_STATE_ILLEGAL = MBCS_STATE_UNASSIGNED + 1;
|
||||
static final int MBCS_STATE_CHANGE_ONLY = MBCS_STATE_ILLEGAL + 1;
|
||||
|
||||
/* Methods for state table entries */
|
||||
private static int MBCS_ENTRY_TRANSITION(int state, int offset) {
|
||||
static int MBCS_ENTRY_TRANSITION(int state, int offset) {
|
||||
return (state << 24L) | offset;
|
||||
}
|
||||
|
||||
private static int MBCS_ENTRY_FINAL(int state, int action, int value) {
|
||||
static int MBCS_ENTRY_FINAL(int state, int action, int value) {
|
||||
return (int) (0x80000000 | ((int) (state) << 24L) | ((action) << 20L) | (value));
|
||||
}
|
||||
|
||||
private static boolean MBCS_ENTRY_IS_TRANSITION(int entry) {
|
||||
static boolean MBCS_ENTRY_IS_TRANSITION(int entry) {
|
||||
return (entry) >= 0;
|
||||
}
|
||||
|
||||
private static boolean MBCS_ENTRY_IS_FINAL(int entry) {
|
||||
static boolean MBCS_ENTRY_IS_FINAL(int entry) {
|
||||
return (entry) < 0;
|
||||
}
|
||||
|
||||
private static int MBCS_ENTRY_TRANSITION_STATE(int entry) {
|
||||
static int MBCS_ENTRY_TRANSITION_STATE(int entry) {
|
||||
return ((entry) >>> 24);
|
||||
}
|
||||
|
||||
private static int MBCS_ENTRY_TRANSITION_OFFSET(int entry) {
|
||||
static int MBCS_ENTRY_TRANSITION_OFFSET(int entry) {
|
||||
return ((entry) & 0xffffff);
|
||||
}
|
||||
|
||||
private static int MBCS_ENTRY_FINAL_STATE(int entry) {
|
||||
static int MBCS_ENTRY_FINAL_STATE(int entry) {
|
||||
return ((entry) >>> 24) & 0x7f;
|
||||
}
|
||||
|
||||
private static boolean MBCS_ENTRY_FINAL_IS_VALID_DIRECT_16(int entry) {
|
||||
static boolean MBCS_ENTRY_FINAL_IS_VALID_DIRECT_16(int entry) {
|
||||
return ((entry) < 0x80100000);
|
||||
}
|
||||
|
||||
private static int MBCS_ENTRY_FINAL_ACTION(int entry) {
|
||||
static int MBCS_ENTRY_FINAL_ACTION(int entry) {
|
||||
return ((entry) >>> 20) & 0xf;
|
||||
}
|
||||
|
||||
private static int MBCS_ENTRY_FINAL_VALUE(int entry) {
|
||||
static int MBCS_ENTRY_FINAL_VALUE(int entry) {
|
||||
return ((entry) & 0xfffff);
|
||||
}
|
||||
|
||||
private static char MBCS_ENTRY_FINAL_VALUE_16(int entry) {
|
||||
static char MBCS_ENTRY_FINAL_VALUE_16(int entry) {
|
||||
return (char) (entry);
|
||||
}
|
||||
|
||||
@ -579,14 +579,14 @@ class CharsetMBCS extends CharsetICU {
|
||||
}
|
||||
|
||||
/* single-byte fromUnicode: get the 16-bit result word */
|
||||
private static char MBCS_SINGLE_RESULT_FROM_U(char[] table, byte[] results, int c) {
|
||||
static char MBCS_SINGLE_RESULT_FROM_U(char[] table, byte[] results, int c) {
|
||||
int i1 = table[c >>> 10] + ((c >>> 4) & 0x3f);
|
||||
int i = 2 * (table[i1] + (c & 0xf)); // used as index into byte[] array treated as char[] array
|
||||
return (char) (((results[i] & UConverterConstants.UNSIGNED_BYTE_MASK) << 8) | (results[i + 1] & UConverterConstants.UNSIGNED_BYTE_MASK));
|
||||
}
|
||||
|
||||
/* multi-byte fromUnicode: get the 32-bit stage 2 entry */
|
||||
private static int MBCS_STAGE_2_FROM_U(char[] table, int c) {
|
||||
static int MBCS_STAGE_2_FROM_U(char[] table, int c) {
|
||||
int i = 2 * (table[(c) >>> 10] + ((c >>> 4) & 0x3f)); // 2x because used as index into char[] array treated as
|
||||
// int[] array
|
||||
return ((table[i] & UConverterConstants.UNSIGNED_SHORT_MASK) << 16)
|
||||
@ -597,7 +597,7 @@ class CharsetMBCS extends CharsetICU {
|
||||
return (((stage2Entry) & (1 << (16 + ((c) & 0xf)))) != 0);
|
||||
}
|
||||
|
||||
private static char MBCS_VALUE_2_FROM_STAGE_2(byte[] bytes, int stage2Entry, int c) {
|
||||
static char MBCS_VALUE_2_FROM_STAGE_2(byte[] bytes, int stage2Entry, int c) {
|
||||
int i = 2 * (16 * ((char) stage2Entry & UConverterConstants.UNSIGNED_SHORT_MASK) + (c & 0xf));
|
||||
return (char) (((bytes[i] & UConverterConstants.UNSIGNED_BYTE_MASK) << 8) | (bytes[i + 1] & UConverterConstants.UNSIGNED_BYTE_MASK));
|
||||
}
|
||||
@ -610,32 +610,32 @@ class CharsetMBCS extends CharsetICU {
|
||||
| (bytes[i + 3] & UConverterConstants.UNSIGNED_BYTE_MASK);
|
||||
}
|
||||
|
||||
private static int MBCS_POINTER_3_FROM_STAGE_2(byte[] bytes, int stage2Entry, int c) {
|
||||
static int MBCS_POINTER_3_FROM_STAGE_2(byte[] bytes, int stage2Entry, int c) {
|
||||
return ((16 * ((char) (stage2Entry) & UConverterConstants.UNSIGNED_SHORT_MASK) + ((c) & 0xf)) * 3);
|
||||
}
|
||||
|
||||
// ------------UConverterExt-------------------------------------------------------
|
||||
|
||||
private static final int EXT_INDEXES_LENGTH = 0; /* 0 */
|
||||
static final int EXT_INDEXES_LENGTH = 0; /* 0 */
|
||||
|
||||
private static final int EXT_TO_U_INDEX = EXT_INDEXES_LENGTH + 1; /* 1 */
|
||||
private static final int EXT_TO_U_LENGTH = EXT_TO_U_INDEX + 1;
|
||||
private static final int EXT_TO_U_UCHARS_INDEX = EXT_TO_U_LENGTH + 1;
|
||||
private static final int EXT_TO_U_UCHARS_LENGTH = EXT_TO_U_UCHARS_INDEX + 1;
|
||||
static final int EXT_TO_U_INDEX = EXT_INDEXES_LENGTH + 1; /* 1 */
|
||||
static final int EXT_TO_U_LENGTH = EXT_TO_U_INDEX + 1;
|
||||
static final int EXT_TO_U_UCHARS_INDEX = EXT_TO_U_LENGTH + 1;
|
||||
static final int EXT_TO_U_UCHARS_LENGTH = EXT_TO_U_UCHARS_INDEX + 1;
|
||||
|
||||
private static final int EXT_FROM_U_UCHARS_INDEX = EXT_TO_U_UCHARS_LENGTH + 1; /* 5 */
|
||||
private static final int EXT_FROM_U_VALUES_INDEX = EXT_FROM_U_UCHARS_INDEX + 1;
|
||||
private static final int EXT_FROM_U_LENGTH = EXT_FROM_U_VALUES_INDEX + 1;
|
||||
private static final int EXT_FROM_U_BYTES_INDEX = EXT_FROM_U_LENGTH + 1;
|
||||
private static final int EXT_FROM_U_BYTES_LENGTH = EXT_FROM_U_BYTES_INDEX + 1;
|
||||
static final int EXT_FROM_U_UCHARS_INDEX = EXT_TO_U_UCHARS_LENGTH + 1; /* 5 */
|
||||
static final int EXT_FROM_U_VALUES_INDEX = EXT_FROM_U_UCHARS_INDEX + 1;
|
||||
static final int EXT_FROM_U_LENGTH = EXT_FROM_U_VALUES_INDEX + 1;
|
||||
static final int EXT_FROM_U_BYTES_INDEX = EXT_FROM_U_LENGTH + 1;
|
||||
static final int EXT_FROM_U_BYTES_LENGTH = EXT_FROM_U_BYTES_INDEX + 1;
|
||||
|
||||
private static final int EXT_FROM_U_STAGE_12_INDEX = EXT_FROM_U_BYTES_LENGTH + 1; /* 10 */
|
||||
private static final int EXT_FROM_U_STAGE_1_LENGTH = EXT_FROM_U_STAGE_12_INDEX + 1;
|
||||
private static final int EXT_FROM_U_STAGE_12_LENGTH = EXT_FROM_U_STAGE_1_LENGTH + 1;
|
||||
private static final int EXT_FROM_U_STAGE_3_INDEX = EXT_FROM_U_STAGE_12_LENGTH + 1;
|
||||
private static final int EXT_FROM_U_STAGE_3_LENGTH = EXT_FROM_U_STAGE_3_INDEX + 1;
|
||||
private static final int EXT_FROM_U_STAGE_3B_INDEX = EXT_FROM_U_STAGE_3_LENGTH + 1;
|
||||
private static final int EXT_FROM_U_STAGE_3B_LENGTH = EXT_FROM_U_STAGE_3B_INDEX + 1;
|
||||
static final int EXT_FROM_U_STAGE_12_INDEX = EXT_FROM_U_BYTES_LENGTH + 1; /* 10 */
|
||||
static final int EXT_FROM_U_STAGE_1_LENGTH = EXT_FROM_U_STAGE_12_INDEX + 1;
|
||||
static final int EXT_FROM_U_STAGE_12_LENGTH = EXT_FROM_U_STAGE_1_LENGTH + 1;
|
||||
static final int EXT_FROM_U_STAGE_3_INDEX = EXT_FROM_U_STAGE_12_LENGTH + 1;
|
||||
static final int EXT_FROM_U_STAGE_3_LENGTH = EXT_FROM_U_STAGE_3_INDEX + 1;
|
||||
static final int EXT_FROM_U_STAGE_3B_INDEX = EXT_FROM_U_STAGE_3_LENGTH + 1;
|
||||
static final int EXT_FROM_U_STAGE_3B_LENGTH = EXT_FROM_U_STAGE_3B_INDEX + 1;
|
||||
|
||||
private static final int EXT_COUNT_BYTES = EXT_FROM_U_STAGE_3B_LENGTH + 1; /* 17 */
|
||||
// private static final int EXT_COUNT_UCHARS = EXT_COUNT_BYTES + 1;
|
||||
@ -646,7 +646,7 @@ class CharsetMBCS extends CharsetICU {
|
||||
// private static final int EXT_SIZE=31;
|
||||
// private static final int EXT_INDEXES_MIN_LENGTH=32;
|
||||
|
||||
private static final int EXT_FROM_U_MAX_DIRECT_LENGTH = 3;
|
||||
static final int EXT_FROM_U_MAX_DIRECT_LENGTH = 3;
|
||||
|
||||
/* toUnicode helpers -------------------------------------------------------- */
|
||||
|
||||
@ -660,29 +660,29 @@ class CharsetMBCS extends CharsetICU {
|
||||
private static final int TO_U_LENGTH_OFFSET = 12;
|
||||
|
||||
/* maximum number of indexed UChars */
|
||||
private static final int MAX_UCHARS = 19;
|
||||
static final int MAX_UCHARS = 19;
|
||||
|
||||
private static int TO_U_GET_BYTE(int word) {
|
||||
static int TO_U_GET_BYTE(int word) {
|
||||
return word >>> TO_U_BYTE_SHIFT;
|
||||
}
|
||||
|
||||
private static int TO_U_GET_VALUE(int word) {
|
||||
static int TO_U_GET_VALUE(int word) {
|
||||
return word & TO_U_VALUE_MASK;
|
||||
}
|
||||
|
||||
private static boolean TO_U_IS_ROUNDTRIP(int value) {
|
||||
static boolean TO_U_IS_ROUNDTRIP(int value) {
|
||||
return (value & TO_U_ROUNDTRIP_FLAG) != 0;
|
||||
}
|
||||
|
||||
private static boolean TO_U_IS_PARTIAL(int value) {
|
||||
static boolean TO_U_IS_PARTIAL(int value) {
|
||||
return (value & UConverterConstants.UNSIGNED_INT_MASK) < TO_U_MIN_CODE_POINT;
|
||||
}
|
||||
|
||||
private static int TO_U_GET_PARTIAL_INDEX(int value) {
|
||||
static int TO_U_GET_PARTIAL_INDEX(int value) {
|
||||
return value;
|
||||
}
|
||||
|
||||
private static int TO_U_MASK_ROUNDTRIP(int value) {
|
||||
static int TO_U_MASK_ROUNDTRIP(int value) {
|
||||
return value & ~TO_U_ROUNDTRIP_FLAG;
|
||||
}
|
||||
|
||||
@ -691,11 +691,11 @@ class CharsetMBCS extends CharsetICU {
|
||||
}
|
||||
|
||||
/* use after masking off the roundtrip flag */
|
||||
private static boolean TO_U_IS_CODE_POINT(int value) {
|
||||
static boolean TO_U_IS_CODE_POINT(int value) {
|
||||
return (value & UConverterConstants.UNSIGNED_INT_MASK) <= TO_U_MAX_CODE_POINT;
|
||||
}
|
||||
|
||||
private static int TO_U_GET_CODE_POINT(int value) {
|
||||
static int TO_U_GET_CODE_POINT(int value) {
|
||||
return (int) ((value & UConverterConstants.UNSIGNED_INT_MASK) - TO_U_MIN_CODE_POINT);
|
||||
}
|
||||
|
||||
@ -715,34 +715,34 @@ class CharsetMBCS extends CharsetICU {
|
||||
// private static final int STAGE_3_GRANULARITY = 4;
|
||||
|
||||
/* trie access, returns the stage 3 value=index to stage 3b; s1Index=c>>10 */
|
||||
private static int FROM_U(CharBuffer stage12, CharBuffer stage3, int s1Index, int c) {
|
||||
static int FROM_U(CharBuffer stage12, CharBuffer stage3, int s1Index, int c) {
|
||||
return stage3.get(((int) stage12.get((stage12.get(s1Index) + ((c >>> 4) & 0x3f))) << STAGE_2_LEFT_SHIFT)
|
||||
+ (c & 0xf));
|
||||
}
|
||||
|
||||
private static final int FROM_U_LENGTH_SHIFT = 24;
|
||||
private static final int FROM_U_ROUNDTRIP_FLAG = 1 << 31;
|
||||
private static final int FROM_U_RESERVED_MASK = 0x60000000;
|
||||
static final int FROM_U_RESERVED_MASK = 0x60000000;
|
||||
private static final int FROM_U_DATA_MASK = 0xffffff;
|
||||
|
||||
/* special value for "no mapping" to <subchar1> (impossible roundtrip to 0 bytes, value 01) */
|
||||
private static final int FROM_U_SUBCHAR1 = 0x80000001;
|
||||
static final int FROM_U_SUBCHAR1 = 0x80000001;
|
||||
|
||||
/* at most 3 bytes in the lower part of the value */
|
||||
private static final int FROM_U_MAX_DIRECT_LENGTH = 3;
|
||||
|
||||
/* maximum number of indexed bytes */
|
||||
private static final int MAX_BYTES = 0x1f;
|
||||
static final int MAX_BYTES = 0x1f;
|
||||
|
||||
private static boolean FROM_U_IS_PARTIAL(int value) {
|
||||
static boolean FROM_U_IS_PARTIAL(int value) {
|
||||
return (value >>> FROM_U_LENGTH_SHIFT) == 0;
|
||||
}
|
||||
|
||||
private static int FROM_U_GET_PARTIAL_INDEX(int value) {
|
||||
static int FROM_U_GET_PARTIAL_INDEX(int value) {
|
||||
return value;
|
||||
}
|
||||
|
||||
private static boolean FROM_U_IS_ROUNDTRIP(int value) {
|
||||
static boolean FROM_U_IS_ROUNDTRIP(int value) {
|
||||
return (value & FROM_U_ROUNDTRIP_FLAG) != 0;
|
||||
}
|
||||
|
||||
@ -751,17 +751,17 @@ class CharsetMBCS extends CharsetICU {
|
||||
}
|
||||
|
||||
/* use after masking off the roundtrip flag */
|
||||
private static int FROM_U_GET_LENGTH(int value) {
|
||||
static int FROM_U_GET_LENGTH(int value) {
|
||||
return (value >>> FROM_U_LENGTH_SHIFT) & MAX_BYTES;
|
||||
}
|
||||
|
||||
/* get bytes or bytes index */
|
||||
private static int FROM_U_GET_DATA(int value) {
|
||||
static int FROM_U_GET_DATA(int value) {
|
||||
return value & FROM_U_DATA_MASK;
|
||||
}
|
||||
|
||||
/* get the pointer to an extension array from indexes[index] */
|
||||
private static Buffer ARRAY(ByteBuffer indexes, int index, Class itemType) {
|
||||
static Buffer ARRAY(ByteBuffer indexes, int index, Class itemType) {
|
||||
int oldpos = indexes.position();
|
||||
Buffer b;
|
||||
|
||||
@ -787,7 +787,7 @@ class CharsetMBCS extends CharsetICU {
|
||||
/*
|
||||
* @return index of the UChar, if found; else <0
|
||||
*/
|
||||
private static int findFromU(CharBuffer fromUSection, int length, char u) {
|
||||
static int findFromU(CharBuffer fromUSection, int length, char u) {
|
||||
int i, start, limit;
|
||||
|
||||
/* binary search */
|
||||
@ -835,7 +835,7 @@ class CharsetMBCS extends CharsetICU {
|
||||
/*
|
||||
* @return lookup value for the byte, if found; else 0
|
||||
*/
|
||||
private static int findToU(IntBuffer toUSection, int length, short byt) {
|
||||
static int findToU(IntBuffer toUSection, int length, short byt) {
|
||||
long word0, word;
|
||||
int i, start, limit;
|
||||
|
||||
@ -912,7 +912,7 @@ class CharsetMBCS extends CharsetICU {
|
||||
/*
|
||||
* TRUE if not an SI/SO stateful converter, or if the match length fits with the current converter state
|
||||
*/
|
||||
private static boolean TO_U_VERIFY_SISO_MATCH(byte sisoState, int match) {
|
||||
static boolean TO_U_VERIFY_SISO_MATCH(byte sisoState, int match) {
|
||||
return sisoState < 0 || (sisoState == 0) == (match == 1);
|
||||
}
|
||||
|
||||
@ -927,576 +927,6 @@ class CharsetMBCS extends CharsetICU {
|
||||
: sharedData.mbcs.outputType == MBCS_OUTPUT_DBCS_ONLY ? 1 : -1;
|
||||
}
|
||||
|
||||
private static int getFallback(UConverterMBCSTable mbcsTable, int offset)
|
||||
{
|
||||
MBCSToUFallback[] toUFallbacks;
|
||||
int i, start, limit;
|
||||
|
||||
limit = mbcsTable.countToUFallbacks;
|
||||
if(limit>0) {
|
||||
/* do a binary search for the fallback mapping */
|
||||
toUFallbacks = mbcsTable.toUFallbacks;
|
||||
start = 0;
|
||||
while(start<limit-1) {
|
||||
i = (start+limit)/2;
|
||||
if(offset<toUFallbacks[i].offset) {
|
||||
limit = i;
|
||||
}
|
||||
else {
|
||||
start = i;
|
||||
}
|
||||
}
|
||||
|
||||
/* did we really find it? */
|
||||
if(offset==toUFallbacks[start].offset) {
|
||||
return toUFallbacks[start].codePoint;
|
||||
}
|
||||
}
|
||||
|
||||
return 0xfffe;
|
||||
}
|
||||
|
||||
/*
|
||||
* This is a simple version of _MBCSGetNextUChar() that is used
|
||||
* by other converter implementations.
|
||||
* It only returns an "assigned" result if it consumes the entire input.
|
||||
* It does not use state from the converter, nor error codes.
|
||||
* It does not handle the EBCDIC swaplfnl option (set in UConverter).
|
||||
* It handles conversion extensions but not GB 18030.
|
||||
*
|
||||
* Return value:
|
||||
* U+fffe unassigned
|
||||
* U+ffff illegal
|
||||
* otherwise the Unicode code point
|
||||
*/
|
||||
static int MBCSSimpleGetNextUChar(UConverterSharedData sharedData,
|
||||
ByteBuffer source,
|
||||
boolean useFallback) {
|
||||
int[][] stateTable;
|
||||
char[] unicodeCodeUnits;
|
||||
|
||||
int offset;
|
||||
int state;
|
||||
int action;
|
||||
|
||||
int c;
|
||||
int entry;
|
||||
|
||||
/* set up the local pointers */
|
||||
stateTable=sharedData.mbcs.stateTable;
|
||||
unicodeCodeUnits=sharedData.mbcs.unicodeCodeUnits;
|
||||
|
||||
/* converter state */
|
||||
offset=0;
|
||||
state=sharedData.mbcs.dbcsOnlyState;
|
||||
|
||||
/* conversion loop */
|
||||
for(;;) {
|
||||
if (source.hasRemaining() == false) {
|
||||
/* no input at all: "illegal" */
|
||||
return 0xffff;
|
||||
}
|
||||
int sourceByte = source.get() & UConverterConstants.UNSIGNED_BYTE_MASK;
|
||||
entry = stateTable[state][sourceByte];
|
||||
if (MBCS_ENTRY_IS_TRANSITION(entry)) {
|
||||
state = MBCS_ENTRY_TRANSITION_STATE(entry);
|
||||
offset+=MBCS_ENTRY_TRANSITION_OFFSET(entry);
|
||||
} else {
|
||||
/*
|
||||
* An if-else-if chain provides more reliable performance for
|
||||
* the most common cases compared to a switch.
|
||||
*/
|
||||
action = MBCS_ENTRY_FINAL_ACTION(entry);
|
||||
if(action==MBCS_STATE_VALID_16) {
|
||||
offset+=MBCS_ENTRY_FINAL_VALUE_16(entry);
|
||||
c=unicodeCodeUnits[offset];
|
||||
if(c!=0xfffe) {
|
||||
/* done */
|
||||
} else if (useFallback) {
|
||||
c = getFallback(sharedData.mbcs, offset);
|
||||
/* else done with 0xfffe */
|
||||
}
|
||||
break;
|
||||
} else if(action==MBCS_STATE_VALID_DIRECT_16) {
|
||||
/* output BMP code point */
|
||||
c = MBCS_ENTRY_FINAL_VALUE_16(entry);
|
||||
break;
|
||||
} else if (action==MBCS_STATE_VALID_16_PAIR) {
|
||||
offset += MBCS_ENTRY_FINAL_VALUE_16(entry);
|
||||
c=unicodeCodeUnits[offset++];
|
||||
if(c<0xd800) {
|
||||
/* output BMP code point below 0xd800 */
|
||||
} else if (useFallback ? c<=0xdfff : c<=0xdbff) {
|
||||
/* output roundtrip or fallback supplementary code point */
|
||||
c = (((c&0x3ff)<<10) + unicodeCodeUnits[offset] + (0x10000-0xdc00));
|
||||
} else if(useFallback ? (c&0xfffe)==0xe000 : c==0xe000) {
|
||||
/* output roundtrip BMP code point above 0xd800 or fallback BMP code point */
|
||||
c=unicodeCodeUnits[offset];
|
||||
} else if(c==0xffff) {
|
||||
return 0xffff;
|
||||
} else {
|
||||
c=0xfffe;
|
||||
}
|
||||
break;
|
||||
} else if(action==MBCS_STATE_VALID_DIRECT_20) {
|
||||
/* output supplementary code point */
|
||||
c=0x10000+MBCS_ENTRY_FINAL_VALUE(entry);
|
||||
break;
|
||||
} else if(action==MBCS_STATE_FALLBACK_DIRECT_16) {
|
||||
if(!useFallback) {
|
||||
c=0xfffe;
|
||||
break;
|
||||
}
|
||||
/* output BMP code point */
|
||||
c=MBCS_ENTRY_FINAL_VALUE_16(entry);
|
||||
break;
|
||||
} else if(action==MBCS_STATE_FALLBACK_DIRECT_20) {
|
||||
if(!useFallback) {
|
||||
c=0xfffe;
|
||||
break;
|
||||
}
|
||||
/* output supplementary code point */
|
||||
c=0x10000+MBCS_ENTRY_FINAL_VALUE(entry);
|
||||
break;
|
||||
} else if(action==MBCS_STATE_UNASSIGNED) {
|
||||
c=0xfffe;
|
||||
break;
|
||||
}
|
||||
|
||||
/*
|
||||
* forbid MBCS_STATE_CHANGE_ONLY for this function,
|
||||
* and MBCS_STATE_ILLEGAL and reserved action codes
|
||||
*/
|
||||
c = 0xffff;
|
||||
break;
|
||||
}
|
||||
}
|
||||
|
||||
if(c==0xfffe) {
|
||||
/* try an extension mapping */
|
||||
ByteBuffer cx=sharedData.mbcs.extIndexes;
|
||||
cx.position(0);
|
||||
if(cx != null) {
|
||||
source.position(0);
|
||||
return extSimpleMatchToU(cx, source, useFallback, sharedData);
|
||||
}
|
||||
}
|
||||
|
||||
return c;
|
||||
}
|
||||
/* This private static method is use by MBCSSimpleGetNextUChar for extension mapping.*/
|
||||
private static int extSimpleMatchToU(ByteBuffer cx, ByteBuffer source, boolean useFallback, UConverterSharedData sharedData) {
|
||||
int[] value = new int[1];
|
||||
int match;
|
||||
|
||||
if (source.remaining() <= 0) {
|
||||
return 0xffff;
|
||||
}
|
||||
|
||||
/* try to match */
|
||||
match = extMatchToU(cx, (byte)-1, source, null, value,
|
||||
useFallback, true, sharedData);
|
||||
|
||||
if (match == source.array().length) {
|
||||
/* write result for simple, single-character conversion */
|
||||
if (TO_U_IS_CODE_POINT(value[0])) {
|
||||
return TO_U_GET_CODE_POINT(value[0]);
|
||||
}
|
||||
}
|
||||
|
||||
/*
|
||||
* return no match because
|
||||
* - match>0 && value points to string: simple conversion cannot handle multiple code points
|
||||
* - match>0 && match!=length: not all input consumed, forbidden for this function
|
||||
* - match==0: no match found in the first place
|
||||
* - match<0: partial match, not supported for simple conversion (and flush==TRUE)
|
||||
*/
|
||||
return 0xfffe;
|
||||
}
|
||||
/* This private static method is use by extSimpleMatchToU for extension mapping. */
|
||||
private static int extMatchToU(ByteBuffer cx, byte sisoState, ByteBuffer pre, ByteBuffer src,
|
||||
int[] pMatchValue, boolean isUseFallback, boolean flush, UConverterSharedData sharedData) {
|
||||
IntBuffer toUTable, toUSection;
|
||||
|
||||
int preLength = pre.array().length;
|
||||
int value, matchValue, srcLength;
|
||||
int i, j, index, length, matchLength;
|
||||
short b;
|
||||
|
||||
if (src == null) {
|
||||
srcLength = 0;
|
||||
} else {
|
||||
srcLength = src.array().length;
|
||||
}
|
||||
|
||||
if (cx == null || cx.getInt(EXT_TO_U_LENGTH) <= 0) {
|
||||
return 0; /* no extension data, no match */
|
||||
}
|
||||
|
||||
/* initialize */
|
||||
toUTable = (IntBuffer)ARRAY(cx, EXT_TO_U_INDEX, int.class);//(IntBuffer) ARRAY(cx, EXT_TO_U_INDEX, int.class);
|
||||
index = 0;
|
||||
|
||||
matchValue = 0;
|
||||
i = j = matchLength = 0;
|
||||
|
||||
if (sisoState == 0) {
|
||||
/* SBCS state of an SI/SO stateful converter, look at only exactly 1 byte */
|
||||
if (preLength > 1) {
|
||||
return 0; /* no match of a DBCS sequence in SBCS mode */
|
||||
} else if (preLength == 1) {
|
||||
srcLength = 0;
|
||||
} else /* preLength==0 */{
|
||||
if (srcLength > 1) {
|
||||
srcLength = 1;
|
||||
}
|
||||
}
|
||||
flush = true;
|
||||
}
|
||||
|
||||
/* we must not remember fallback matches when not using fallbacks */
|
||||
|
||||
/* match input units until there is a full match or the input is consumed */
|
||||
for (;;) {
|
||||
/* go to the next section */
|
||||
int oldpos = toUTable.position();
|
||||
toUSection = ((IntBuffer) toUTable.position(index)).slice();
|
||||
toUTable.position(oldpos);
|
||||
|
||||
/* read first pair of the section */
|
||||
value = toUSection.get();
|
||||
length = TO_U_GET_BYTE(value);
|
||||
value = TO_U_GET_VALUE(value);
|
||||
if (value != 0 && (TO_U_IS_ROUNDTRIP(value)) /*|| isToUUseFallback(isUseFallback))*/
|
||||
&& TO_U_VERIFY_SISO_MATCH(sisoState, i + j)) {
|
||||
/* remember longest match so far */
|
||||
matchValue = value;
|
||||
matchLength = i + j;
|
||||
}
|
||||
|
||||
/* match pre[] then src[] */
|
||||
if (i < preLength) {
|
||||
b = (short) (pre.get(i++) & UConverterConstants.UNSIGNED_BYTE_MASK);
|
||||
} else if (j < srcLength) {
|
||||
b = (short) (src.get(j++) & UConverterConstants.UNSIGNED_BYTE_MASK);
|
||||
} else {
|
||||
/* all input consumed, partial match */
|
||||
if (flush || (length = (i + j)) > MAX_BYTES) {
|
||||
/*
|
||||
* end of the entire input stream, stop with the longest match so far or: partial match must not
|
||||
* be longer than UCNV_EXT_MAX_BYTES because it must fit into state buffers
|
||||
*/
|
||||
break;
|
||||
} else {
|
||||
/* continue with more input next time */
|
||||
return -length;
|
||||
}
|
||||
}
|
||||
|
||||
/* search for the current UChar */
|
||||
value = findToU(toUSection, length, b);
|
||||
if (value == 0) {
|
||||
/* no match here, stop with the longest match so far */
|
||||
break;
|
||||
} else {
|
||||
if (TO_U_IS_PARTIAL(value)) {
|
||||
/* partial match, continue */
|
||||
index = TO_U_GET_PARTIAL_INDEX(value);
|
||||
} else {
|
||||
if (TO_U_IS_ROUNDTRIP(value) /*|| isToUUseFallback(isUseFallback)) */&& TO_U_VERIFY_SISO_MATCH(sisoState, i + j)) {
|
||||
/* full match, stop with result */
|
||||
matchValue = value;
|
||||
matchLength = i + j;
|
||||
} else {
|
||||
/* full match on fallback not taken, stop with the longest match so far */
|
||||
}
|
||||
break;
|
||||
}
|
||||
}
|
||||
}
|
||||
|
||||
if (matchLength == 0) {
|
||||
/* no match at all */
|
||||
return 0;
|
||||
}
|
||||
|
||||
/* return result */
|
||||
pMatchValue[0] = TO_U_MASK_ROUNDTRIP(matchValue);
|
||||
return matchLength;
|
||||
}
|
||||
/*
|
||||
* This is another simple conversion function for internal use by other
|
||||
* conversion implementations.
|
||||
* It does not use the converter state nor call callbacks.
|
||||
* It does not handle the EBCDIC swaplfnl option (set in UConverter).
|
||||
* It handles conversion extensions but not GB 1830.
|
||||
*
|
||||
* It converts a single Unicode code point into code page bytes, encoded
|
||||
* as one 32-bit value. The function returns the number of bytes in *pValue:
|
||||
* 1..4 the number of bytes in *pValue
|
||||
* 0 unassigned (*pValue undefined)
|
||||
* -1 illegal (currently not used, *pValue undefined)
|
||||
* *pValue will contain the resulting bytes with the last byte in bits 7..0,
|
||||
* the second to last byte in bits 15..8, etc.
|
||||
* Currently the function assumes but does not check that 0<=c<=0x10ffff.
|
||||
*/
|
||||
static int MBCSFromUChar32_ISO2022(UConverterSharedData sharedData, int c, int[] value, boolean useFallback,
|
||||
int outputType) { // Output Type from MBCS, e.g. CharsetMBCS.MBCS_OUTPUT_2
|
||||
ByteBuffer cx;
|
||||
char[] table;
|
||||
int stage2Entry;
|
||||
int myValue;
|
||||
int length;
|
||||
int p;
|
||||
|
||||
/* BMP-only codepages are stored without stage 1 entries for supplementary code points */
|
||||
if (c<0x10000 || (sharedData.mbcs.unicodeMask& UConverterConstants.HAS_SUPPLEMENTARY) != 0) {
|
||||
table = sharedData.mbcs.fromUnicodeTable;
|
||||
stage2Entry = MBCS_STAGE_2_FROM_U(table, c);
|
||||
|
||||
/* get the bytes and the length for the output */
|
||||
if (outputType == MBCS_OUTPUT_2) {
|
||||
|
||||
myValue = MBCS_VALUE_2_FROM_STAGE_2(sharedData.mbcs.fromUnicodeBytes, stage2Entry, c);
|
||||
if (myValue <= 0xff) {
|
||||
length = 1;
|
||||
} else {
|
||||
length = 2;
|
||||
}
|
||||
} else { /* outputType == MBCS_OUTPUT_3 */
|
||||
byte[] bytes = sharedData.mbcs.fromUnicodeBytes;
|
||||
p = MBCS_POINTER_3_FROM_STAGE_2(bytes, stage2Entry, c);
|
||||
myValue = ((bytes[p] & UConverterConstants.UNSIGNED_BYTE_MASK)<<16) |
|
||||
((bytes[p+1] & UConverterConstants.UNSIGNED_BYTE_MASK)<<8) |
|
||||
(bytes[p+2] & UConverterConstants.UNSIGNED_BYTE_MASK);
|
||||
if (myValue <= 0xff) {
|
||||
length = 1;
|
||||
} else if (myValue <= 0xffff) {
|
||||
length = 2;
|
||||
} else {
|
||||
length = 3;
|
||||
}
|
||||
}
|
||||
/* is this code point assigned, or do we use fallbacks? */
|
||||
if ((stage2Entry&(1<<(16+(c&0xf)))) != 0) {
|
||||
/* assigned */
|
||||
value[0] = myValue;
|
||||
return length;
|
||||
} else if (CharsetEncoderICU.isFromUUseFallback(useFallback, c) && myValue != 0) {
|
||||
/*
|
||||
* We allow a 0 byte output if the "assigned" bit is set for this entry.
|
||||
* There is no way with this data structure for fallback output
|
||||
* to be a zero byte.
|
||||
*/
|
||||
value[0] = myValue;
|
||||
return -length;
|
||||
}
|
||||
|
||||
}
|
||||
|
||||
cx = sharedData.mbcs.extIndexes;
|
||||
if (cx != null) {
|
||||
return extSimpleMatchFromU(cx, c, value, useFallback);
|
||||
}
|
||||
return 0;
|
||||
}
|
||||
/*
|
||||
* Used by ISO 2022 implementation
|
||||
* @return number of bytes in pValue; negative number if fallback; 0 for no mapping
|
||||
*/
|
||||
private static int extSimpleMatchFromU(ByteBuffer cx, int c, int[] pValue, boolean useFallback) {
|
||||
int match;
|
||||
int[] value = new int[1];
|
||||
|
||||
/*try to match */
|
||||
match = extMatchFromU(cx, c, null, null, value, useFallback, true);
|
||||
if (match >= 2) {
|
||||
int length;
|
||||
boolean isRoundtrip;
|
||||
isRoundtrip = FROM_U_IS_ROUNDTRIP(value[0]);
|
||||
length = FROM_U_GET_LENGTH(value[0]);
|
||||
value[0] = FROM_U_GET_DATA(value[0]);
|
||||
|
||||
if (length <= EXT_FROM_U_MAX_DIRECT_LENGTH) {
|
||||
pValue[0] = value[0];
|
||||
return isRoundtrip ? length : -length;
|
||||
}
|
||||
}
|
||||
|
||||
/*
|
||||
* return no match because
|
||||
* - match>1 && resultLength>4: result too long for simple conversion
|
||||
* - match==1: no match found, <subchar1> preferred
|
||||
* - match==0: no match found in the first place
|
||||
* - match<0: partial match, not supported for simple conversion (and flush==true)
|
||||
*/
|
||||
return 0;
|
||||
}
|
||||
|
||||
private static int extMatchFromU(ByteBuffer cx, int firstCP, char[] pre, char[] src, int[] pMatchValue, boolean useFallback, boolean flush) {
|
||||
CharBuffer stage12, stage3;
|
||||
IntBuffer stage3b;
|
||||
|
||||
CharBuffer fromUTableUChars, fromUSectionUChars;
|
||||
IntBuffer fromUTableValues, fromUSectionValues;
|
||||
|
||||
int value, matchValue;
|
||||
int i, j, index, length, matchLength;
|
||||
char c;
|
||||
|
||||
if (cx == null) {
|
||||
return 0; /* no extension data, no match */
|
||||
}
|
||||
|
||||
/* trie lookup of firstCP */
|
||||
index = firstCP>>10; /* stage 1 index */
|
||||
if (index>=cx.getInt(EXT_FROM_U_STAGE_1_LENGTH*4)) { // need to find the correct int in the bytebuffer
|
||||
return 0; /* the first code point is outside the trie */
|
||||
}
|
||||
|
||||
stage12 = (CharBuffer)ARRAY(cx, EXT_FROM_U_STAGE_12_INDEX, char.class);
|
||||
stage3 = (CharBuffer)ARRAY(cx, EXT_FROM_U_STAGE_3_INDEX, char.class);
|
||||
index = FROM_U(stage12, stage3, index, firstCP);
|
||||
|
||||
stage3b = (IntBuffer)ARRAY(cx, EXT_FROM_U_STAGE_3B_INDEX, int.class);
|
||||
value = stage3b.get(index);
|
||||
if (value == 0) {
|
||||
return 0;
|
||||
}
|
||||
|
||||
/*
|
||||
* Tests for (value&EXT_FROM_U_RESERVED_MASK) == 0:
|
||||
* Do not interpret values with reserved bits used, for forward compatibility,
|
||||
* and do not even remember intermediate results with reserved bits used.
|
||||
*/
|
||||
|
||||
if (TO_U_IS_PARTIAL(value)) {
|
||||
/* partial match, enter the loop below */
|
||||
index = FROM_U_GET_PARTIAL_INDEX(value);
|
||||
|
||||
/* initialize */
|
||||
fromUTableUChars = (CharBuffer)ARRAY(cx, EXT_FROM_U_UCHARS_INDEX, char.class);
|
||||
fromUTableValues = (IntBuffer)ARRAY(cx, EXT_FROM_U_VALUES_INDEX, int.class);
|
||||
|
||||
matchValue = 0;
|
||||
i = j = matchLength = 0;
|
||||
|
||||
/* we must not remember fallback matches when not using fallbacks */
|
||||
|
||||
/*match inputs until there is a full match or the input is consumed */
|
||||
for(;;) {
|
||||
/* go to the next section */
|
||||
int oldpos = fromUTableUChars.position();
|
||||
fromUSectionUChars = ((CharBuffer)fromUTableUChars.position(index)).slice();
|
||||
fromUTableUChars.position(oldpos);
|
||||
oldpos = fromUTableValues.position();
|
||||
fromUSectionValues = ((IntBuffer)fromUTableValues.position(index)).slice();
|
||||
fromUTableValues.position(oldpos);
|
||||
|
||||
/*read first pair of the section */
|
||||
length = fromUSectionUChars.get();
|
||||
value = fromUSectionValues.get();
|
||||
if (value != 0 &&
|
||||
(FROM_U_IS_ROUNDTRIP(value) || CharsetEncoderICU.isFromUUseFallback(useFallback, firstCP)) &&
|
||||
(value&FROM_U_RESERVED_MASK) == 0) {
|
||||
/* remember longest match so far */
|
||||
matchValue = value;
|
||||
matchLength = 2 + i + j;
|
||||
}
|
||||
|
||||
/* match pre[] then src[] */
|
||||
if (pre != null && i < pre.length) {
|
||||
c = pre[i++];
|
||||
} else if (src != null && j < src.length) {
|
||||
c = src[j++];
|
||||
} else {
|
||||
/* all input consumed, partial match */
|
||||
if (flush || (length=(i+j))> MAX_UCHARS) {
|
||||
/*
|
||||
* end of the entire input stream, stop with the longest match so far
|
||||
* or: partial match must not be longer than MAX_UCHARS
|
||||
* because it must fit into state buffers
|
||||
*/
|
||||
break;
|
||||
} else {
|
||||
/* continue with more input next time */
|
||||
return -(2+length);
|
||||
}
|
||||
}
|
||||
|
||||
/* search for the current UChar */
|
||||
index = findFromU(fromUSectionUChars, length, c);
|
||||
if (index < 0) {
|
||||
/* no match here, stop with the longest match so far */
|
||||
break;
|
||||
} else {
|
||||
value = fromUSectionValues.get(index);
|
||||
if (FROM_U_IS_PARTIAL(value)) {
|
||||
/* partial match, continue */
|
||||
index = FROM_U_GET_PARTIAL_INDEX(value);
|
||||
} else {
|
||||
if ((FROM_U_IS_ROUNDTRIP(value) || CharsetEncoderICU.isFromUUseFallback(useFallback, firstCP)) &&
|
||||
(value&FROM_U_RESERVED_MASK) == 0 ) {
|
||||
/* full match, stop with result */
|
||||
matchValue = value;
|
||||
matchLength = 2 + i + j;
|
||||
} else {
|
||||
/* full match on fallback not taken, stop with the longest match so far */
|
||||
}
|
||||
break;
|
||||
}
|
||||
}
|
||||
}
|
||||
|
||||
if (matchLength == 0) {
|
||||
/* no match at all */
|
||||
return 0;
|
||||
}
|
||||
} else { /* result from firstCP trie lookup */
|
||||
if ((FROM_U_IS_ROUNDTRIP(value) || CharsetEncoderICU.isFromUUseFallback(useFallback, firstCP)) &&
|
||||
(value&FROM_U_RESERVED_MASK) == 0) {
|
||||
/* full match, stop with result */
|
||||
matchValue = value;
|
||||
matchLength = 2;
|
||||
} else {
|
||||
/* fallback not taken */
|
||||
return 0;
|
||||
}
|
||||
}
|
||||
|
||||
/* return result */
|
||||
if (matchValue == FROM_U_SUBCHAR1) {
|
||||
return 1; /* assert matchLength == 2 */
|
||||
}
|
||||
pMatchValue[0] = matchValue;
|
||||
return matchLength;
|
||||
}
|
||||
/*
|
||||
* @param is the the output byte
|
||||
* @return 1 roundtrip byte 0 no mapping -1 fallback byte
|
||||
*/
|
||||
static int MBCSSingleFromUChar32(UConverterSharedData sharedData, int c, int[] retval, boolean useFallback) {
|
||||
char[] table;
|
||||
int value;
|
||||
/* BMP-only codepages are stored without stage 1 entries for supplementary code points */
|
||||
if (c >= 0x10000 && (sharedData.mbcs.unicodeMask&UConverterConstants.HAS_SUPPLEMENTARY) == 0) {
|
||||
return 0;
|
||||
}
|
||||
/* convert the Unicode code point in c into codepage bytes */
|
||||
table = sharedData.mbcs.fromUnicodeTable;
|
||||
/* get the byte for the output */
|
||||
value = MBCS_SINGLE_RESULT_FROM_U(table, sharedData.mbcs.fromUnicodeBytes, c);
|
||||
/* get the byte for the output */
|
||||
retval[0] = value & 0xff;
|
||||
if (value >= 0xf00) {
|
||||
return 1; /* roundtrip */
|
||||
} else if (useFallback ? value>=0x800 : value>=0xc00) {
|
||||
return -1; /* fallback taken */
|
||||
} else {
|
||||
return 0; /* no mapping */
|
||||
}
|
||||
}
|
||||
|
||||
class CharsetDecoderMBCS extends CharsetDecoderICU {
|
||||
|
||||
CharsetDecoderMBCS(CharsetICU cs) {
|
||||
|
Loading…
Reference in New Issue
Block a user