ICU-6137 Move new static methods to CharsetISO2022 and make certain static methods and variables package viewable.

X-SVN-Rev: 23411
This commit is contained in:
Michael Ow 2008-02-11 23:24:36 +00:00
parent 2bc412e598
commit 4daa54728a

View File

@ -515,58 +515,58 @@ class CharsetMBCS extends CharsetICU {
/**
* MBCS action codes for conversions to Unicode. These values are in bits 23..20 of the state table entries.
*/
private static final int MBCS_STATE_VALID_DIRECT_16 = 0;
private static final int MBCS_STATE_VALID_DIRECT_20 = MBCS_STATE_VALID_DIRECT_16 + 1;
private static final int MBCS_STATE_FALLBACK_DIRECT_16 = MBCS_STATE_VALID_DIRECT_20 + 1;
private static final int MBCS_STATE_FALLBACK_DIRECT_20 = MBCS_STATE_FALLBACK_DIRECT_16 + 1;
private static final int MBCS_STATE_VALID_16 = MBCS_STATE_FALLBACK_DIRECT_20 + 1;
private static final int MBCS_STATE_VALID_16_PAIR = MBCS_STATE_VALID_16 + 1;
private static final int MBCS_STATE_UNASSIGNED = MBCS_STATE_VALID_16_PAIR + 1;
private static final int MBCS_STATE_ILLEGAL = MBCS_STATE_UNASSIGNED + 1;
private static final int MBCS_STATE_CHANGE_ONLY = MBCS_STATE_ILLEGAL + 1;
static final int MBCS_STATE_VALID_DIRECT_16 = 0;
static final int MBCS_STATE_VALID_DIRECT_20 = MBCS_STATE_VALID_DIRECT_16 + 1;
static final int MBCS_STATE_FALLBACK_DIRECT_16 = MBCS_STATE_VALID_DIRECT_20 + 1;
static final int MBCS_STATE_FALLBACK_DIRECT_20 = MBCS_STATE_FALLBACK_DIRECT_16 + 1;
static final int MBCS_STATE_VALID_16 = MBCS_STATE_FALLBACK_DIRECT_20 + 1;
static final int MBCS_STATE_VALID_16_PAIR = MBCS_STATE_VALID_16 + 1;
static final int MBCS_STATE_UNASSIGNED = MBCS_STATE_VALID_16_PAIR + 1;
static final int MBCS_STATE_ILLEGAL = MBCS_STATE_UNASSIGNED + 1;
static final int MBCS_STATE_CHANGE_ONLY = MBCS_STATE_ILLEGAL + 1;
/* Methods for state table entries */
private static int MBCS_ENTRY_TRANSITION(int state, int offset) {
static int MBCS_ENTRY_TRANSITION(int state, int offset) {
return (state << 24L) | offset;
}
private static int MBCS_ENTRY_FINAL(int state, int action, int value) {
static int MBCS_ENTRY_FINAL(int state, int action, int value) {
return (int) (0x80000000 | ((int) (state) << 24L) | ((action) << 20L) | (value));
}
private static boolean MBCS_ENTRY_IS_TRANSITION(int entry) {
static boolean MBCS_ENTRY_IS_TRANSITION(int entry) {
return (entry) >= 0;
}
private static boolean MBCS_ENTRY_IS_FINAL(int entry) {
static boolean MBCS_ENTRY_IS_FINAL(int entry) {
return (entry) < 0;
}
private static int MBCS_ENTRY_TRANSITION_STATE(int entry) {
static int MBCS_ENTRY_TRANSITION_STATE(int entry) {
return ((entry) >>> 24);
}
private static int MBCS_ENTRY_TRANSITION_OFFSET(int entry) {
static int MBCS_ENTRY_TRANSITION_OFFSET(int entry) {
return ((entry) & 0xffffff);
}
private static int MBCS_ENTRY_FINAL_STATE(int entry) {
static int MBCS_ENTRY_FINAL_STATE(int entry) {
return ((entry) >>> 24) & 0x7f;
}
private static boolean MBCS_ENTRY_FINAL_IS_VALID_DIRECT_16(int entry) {
static boolean MBCS_ENTRY_FINAL_IS_VALID_DIRECT_16(int entry) {
return ((entry) < 0x80100000);
}
private static int MBCS_ENTRY_FINAL_ACTION(int entry) {
static int MBCS_ENTRY_FINAL_ACTION(int entry) {
return ((entry) >>> 20) & 0xf;
}
private static int MBCS_ENTRY_FINAL_VALUE(int entry) {
static int MBCS_ENTRY_FINAL_VALUE(int entry) {
return ((entry) & 0xfffff);
}
private static char MBCS_ENTRY_FINAL_VALUE_16(int entry) {
static char MBCS_ENTRY_FINAL_VALUE_16(int entry) {
return (char) (entry);
}
@ -579,14 +579,14 @@ class CharsetMBCS extends CharsetICU {
}
/* single-byte fromUnicode: get the 16-bit result word */
private static char MBCS_SINGLE_RESULT_FROM_U(char[] table, byte[] results, int c) {
static char MBCS_SINGLE_RESULT_FROM_U(char[] table, byte[] results, int c) {
int i1 = table[c >>> 10] + ((c >>> 4) & 0x3f);
int i = 2 * (table[i1] + (c & 0xf)); // used as index into byte[] array treated as char[] array
return (char) (((results[i] & UConverterConstants.UNSIGNED_BYTE_MASK) << 8) | (results[i + 1] & UConverterConstants.UNSIGNED_BYTE_MASK));
}
/* multi-byte fromUnicode: get the 32-bit stage 2 entry */
private static int MBCS_STAGE_2_FROM_U(char[] table, int c) {
static int MBCS_STAGE_2_FROM_U(char[] table, int c) {
int i = 2 * (table[(c) >>> 10] + ((c >>> 4) & 0x3f)); // 2x because used as index into char[] array treated as
// int[] array
return ((table[i] & UConverterConstants.UNSIGNED_SHORT_MASK) << 16)
@ -597,7 +597,7 @@ class CharsetMBCS extends CharsetICU {
return (((stage2Entry) & (1 << (16 + ((c) & 0xf)))) != 0);
}
private static char MBCS_VALUE_2_FROM_STAGE_2(byte[] bytes, int stage2Entry, int c) {
static char MBCS_VALUE_2_FROM_STAGE_2(byte[] bytes, int stage2Entry, int c) {
int i = 2 * (16 * ((char) stage2Entry & UConverterConstants.UNSIGNED_SHORT_MASK) + (c & 0xf));
return (char) (((bytes[i] & UConverterConstants.UNSIGNED_BYTE_MASK) << 8) | (bytes[i + 1] & UConverterConstants.UNSIGNED_BYTE_MASK));
}
@ -610,32 +610,32 @@ class CharsetMBCS extends CharsetICU {
| (bytes[i + 3] & UConverterConstants.UNSIGNED_BYTE_MASK);
}
private static int MBCS_POINTER_3_FROM_STAGE_2(byte[] bytes, int stage2Entry, int c) {
static int MBCS_POINTER_3_FROM_STAGE_2(byte[] bytes, int stage2Entry, int c) {
return ((16 * ((char) (stage2Entry) & UConverterConstants.UNSIGNED_SHORT_MASK) + ((c) & 0xf)) * 3);
}
// ------------UConverterExt-------------------------------------------------------
private static final int EXT_INDEXES_LENGTH = 0; /* 0 */
static final int EXT_INDEXES_LENGTH = 0; /* 0 */
private static final int EXT_TO_U_INDEX = EXT_INDEXES_LENGTH + 1; /* 1 */
private static final int EXT_TO_U_LENGTH = EXT_TO_U_INDEX + 1;
private static final int EXT_TO_U_UCHARS_INDEX = EXT_TO_U_LENGTH + 1;
private static final int EXT_TO_U_UCHARS_LENGTH = EXT_TO_U_UCHARS_INDEX + 1;
static final int EXT_TO_U_INDEX = EXT_INDEXES_LENGTH + 1; /* 1 */
static final int EXT_TO_U_LENGTH = EXT_TO_U_INDEX + 1;
static final int EXT_TO_U_UCHARS_INDEX = EXT_TO_U_LENGTH + 1;
static final int EXT_TO_U_UCHARS_LENGTH = EXT_TO_U_UCHARS_INDEX + 1;
private static final int EXT_FROM_U_UCHARS_INDEX = EXT_TO_U_UCHARS_LENGTH + 1; /* 5 */
private static final int EXT_FROM_U_VALUES_INDEX = EXT_FROM_U_UCHARS_INDEX + 1;
private static final int EXT_FROM_U_LENGTH = EXT_FROM_U_VALUES_INDEX + 1;
private static final int EXT_FROM_U_BYTES_INDEX = EXT_FROM_U_LENGTH + 1;
private static final int EXT_FROM_U_BYTES_LENGTH = EXT_FROM_U_BYTES_INDEX + 1;
static final int EXT_FROM_U_UCHARS_INDEX = EXT_TO_U_UCHARS_LENGTH + 1; /* 5 */
static final int EXT_FROM_U_VALUES_INDEX = EXT_FROM_U_UCHARS_INDEX + 1;
static final int EXT_FROM_U_LENGTH = EXT_FROM_U_VALUES_INDEX + 1;
static final int EXT_FROM_U_BYTES_INDEX = EXT_FROM_U_LENGTH + 1;
static final int EXT_FROM_U_BYTES_LENGTH = EXT_FROM_U_BYTES_INDEX + 1;
private static final int EXT_FROM_U_STAGE_12_INDEX = EXT_FROM_U_BYTES_LENGTH + 1; /* 10 */
private static final int EXT_FROM_U_STAGE_1_LENGTH = EXT_FROM_U_STAGE_12_INDEX + 1;
private static final int EXT_FROM_U_STAGE_12_LENGTH = EXT_FROM_U_STAGE_1_LENGTH + 1;
private static final int EXT_FROM_U_STAGE_3_INDEX = EXT_FROM_U_STAGE_12_LENGTH + 1;
private static final int EXT_FROM_U_STAGE_3_LENGTH = EXT_FROM_U_STAGE_3_INDEX + 1;
private static final int EXT_FROM_U_STAGE_3B_INDEX = EXT_FROM_U_STAGE_3_LENGTH + 1;
private static final int EXT_FROM_U_STAGE_3B_LENGTH = EXT_FROM_U_STAGE_3B_INDEX + 1;
static final int EXT_FROM_U_STAGE_12_INDEX = EXT_FROM_U_BYTES_LENGTH + 1; /* 10 */
static final int EXT_FROM_U_STAGE_1_LENGTH = EXT_FROM_U_STAGE_12_INDEX + 1;
static final int EXT_FROM_U_STAGE_12_LENGTH = EXT_FROM_U_STAGE_1_LENGTH + 1;
static final int EXT_FROM_U_STAGE_3_INDEX = EXT_FROM_U_STAGE_12_LENGTH + 1;
static final int EXT_FROM_U_STAGE_3_LENGTH = EXT_FROM_U_STAGE_3_INDEX + 1;
static final int EXT_FROM_U_STAGE_3B_INDEX = EXT_FROM_U_STAGE_3_LENGTH + 1;
static final int EXT_FROM_U_STAGE_3B_LENGTH = EXT_FROM_U_STAGE_3B_INDEX + 1;
private static final int EXT_COUNT_BYTES = EXT_FROM_U_STAGE_3B_LENGTH + 1; /* 17 */
// private static final int EXT_COUNT_UCHARS = EXT_COUNT_BYTES + 1;
@ -646,7 +646,7 @@ class CharsetMBCS extends CharsetICU {
// private static final int EXT_SIZE=31;
// private static final int EXT_INDEXES_MIN_LENGTH=32;
private static final int EXT_FROM_U_MAX_DIRECT_LENGTH = 3;
static final int EXT_FROM_U_MAX_DIRECT_LENGTH = 3;
/* toUnicode helpers -------------------------------------------------------- */
@ -660,29 +660,29 @@ class CharsetMBCS extends CharsetICU {
private static final int TO_U_LENGTH_OFFSET = 12;
/* maximum number of indexed UChars */
private static final int MAX_UCHARS = 19;
static final int MAX_UCHARS = 19;
private static int TO_U_GET_BYTE(int word) {
static int TO_U_GET_BYTE(int word) {
return word >>> TO_U_BYTE_SHIFT;
}
private static int TO_U_GET_VALUE(int word) {
static int TO_U_GET_VALUE(int word) {
return word & TO_U_VALUE_MASK;
}
private static boolean TO_U_IS_ROUNDTRIP(int value) {
static boolean TO_U_IS_ROUNDTRIP(int value) {
return (value & TO_U_ROUNDTRIP_FLAG) != 0;
}
private static boolean TO_U_IS_PARTIAL(int value) {
static boolean TO_U_IS_PARTIAL(int value) {
return (value & UConverterConstants.UNSIGNED_INT_MASK) < TO_U_MIN_CODE_POINT;
}
private static int TO_U_GET_PARTIAL_INDEX(int value) {
static int TO_U_GET_PARTIAL_INDEX(int value) {
return value;
}
private static int TO_U_MASK_ROUNDTRIP(int value) {
static int TO_U_MASK_ROUNDTRIP(int value) {
return value & ~TO_U_ROUNDTRIP_FLAG;
}
@ -691,11 +691,11 @@ class CharsetMBCS extends CharsetICU {
}
/* use after masking off the roundtrip flag */
private static boolean TO_U_IS_CODE_POINT(int value) {
static boolean TO_U_IS_CODE_POINT(int value) {
return (value & UConverterConstants.UNSIGNED_INT_MASK) <= TO_U_MAX_CODE_POINT;
}
private static int TO_U_GET_CODE_POINT(int value) {
static int TO_U_GET_CODE_POINT(int value) {
return (int) ((value & UConverterConstants.UNSIGNED_INT_MASK) - TO_U_MIN_CODE_POINT);
}
@ -715,34 +715,34 @@ class CharsetMBCS extends CharsetICU {
// private static final int STAGE_3_GRANULARITY = 4;
/* trie access, returns the stage 3 value=index to stage 3b; s1Index=c>>10 */
private static int FROM_U(CharBuffer stage12, CharBuffer stage3, int s1Index, int c) {
static int FROM_U(CharBuffer stage12, CharBuffer stage3, int s1Index, int c) {
return stage3.get(((int) stage12.get((stage12.get(s1Index) + ((c >>> 4) & 0x3f))) << STAGE_2_LEFT_SHIFT)
+ (c & 0xf));
}
private static final int FROM_U_LENGTH_SHIFT = 24;
private static final int FROM_U_ROUNDTRIP_FLAG = 1 << 31;
private static final int FROM_U_RESERVED_MASK = 0x60000000;
static final int FROM_U_RESERVED_MASK = 0x60000000;
private static final int FROM_U_DATA_MASK = 0xffffff;
/* special value for "no mapping" to <subchar1> (impossible roundtrip to 0 bytes, value 01) */
private static final int FROM_U_SUBCHAR1 = 0x80000001;
static final int FROM_U_SUBCHAR1 = 0x80000001;
/* at most 3 bytes in the lower part of the value */
private static final int FROM_U_MAX_DIRECT_LENGTH = 3;
/* maximum number of indexed bytes */
private static final int MAX_BYTES = 0x1f;
static final int MAX_BYTES = 0x1f;
private static boolean FROM_U_IS_PARTIAL(int value) {
static boolean FROM_U_IS_PARTIAL(int value) {
return (value >>> FROM_U_LENGTH_SHIFT) == 0;
}
private static int FROM_U_GET_PARTIAL_INDEX(int value) {
static int FROM_U_GET_PARTIAL_INDEX(int value) {
return value;
}
private static boolean FROM_U_IS_ROUNDTRIP(int value) {
static boolean FROM_U_IS_ROUNDTRIP(int value) {
return (value & FROM_U_ROUNDTRIP_FLAG) != 0;
}
@ -751,17 +751,17 @@ class CharsetMBCS extends CharsetICU {
}
/* use after masking off the roundtrip flag */
private static int FROM_U_GET_LENGTH(int value) {
static int FROM_U_GET_LENGTH(int value) {
return (value >>> FROM_U_LENGTH_SHIFT) & MAX_BYTES;
}
/* get bytes or bytes index */
private static int FROM_U_GET_DATA(int value) {
static int FROM_U_GET_DATA(int value) {
return value & FROM_U_DATA_MASK;
}
/* get the pointer to an extension array from indexes[index] */
private static Buffer ARRAY(ByteBuffer indexes, int index, Class itemType) {
static Buffer ARRAY(ByteBuffer indexes, int index, Class itemType) {
int oldpos = indexes.position();
Buffer b;
@ -787,7 +787,7 @@ class CharsetMBCS extends CharsetICU {
/*
* @return index of the UChar, if found; else <0
*/
private static int findFromU(CharBuffer fromUSection, int length, char u) {
static int findFromU(CharBuffer fromUSection, int length, char u) {
int i, start, limit;
/* binary search */
@ -835,7 +835,7 @@ class CharsetMBCS extends CharsetICU {
/*
* @return lookup value for the byte, if found; else 0
*/
private static int findToU(IntBuffer toUSection, int length, short byt) {
static int findToU(IntBuffer toUSection, int length, short byt) {
long word0, word;
int i, start, limit;
@ -912,7 +912,7 @@ class CharsetMBCS extends CharsetICU {
/*
* TRUE if not an SI/SO stateful converter, or if the match length fits with the current converter state
*/
private static boolean TO_U_VERIFY_SISO_MATCH(byte sisoState, int match) {
static boolean TO_U_VERIFY_SISO_MATCH(byte sisoState, int match) {
return sisoState < 0 || (sisoState == 0) == (match == 1);
}
@ -927,576 +927,6 @@ class CharsetMBCS extends CharsetICU {
: sharedData.mbcs.outputType == MBCS_OUTPUT_DBCS_ONLY ? 1 : -1;
}
private static int getFallback(UConverterMBCSTable mbcsTable, int offset)
{
MBCSToUFallback[] toUFallbacks;
int i, start, limit;
limit = mbcsTable.countToUFallbacks;
if(limit>0) {
/* do a binary search for the fallback mapping */
toUFallbacks = mbcsTable.toUFallbacks;
start = 0;
while(start<limit-1) {
i = (start+limit)/2;
if(offset<toUFallbacks[i].offset) {
limit = i;
}
else {
start = i;
}
}
/* did we really find it? */
if(offset==toUFallbacks[start].offset) {
return toUFallbacks[start].codePoint;
}
}
return 0xfffe;
}
/*
* This is a simple version of _MBCSGetNextUChar() that is used
* by other converter implementations.
* It only returns an "assigned" result if it consumes the entire input.
* It does not use state from the converter, nor error codes.
* It does not handle the EBCDIC swaplfnl option (set in UConverter).
* It handles conversion extensions but not GB 18030.
*
* Return value:
* U+fffe unassigned
* U+ffff illegal
* otherwise the Unicode code point
*/
static int MBCSSimpleGetNextUChar(UConverterSharedData sharedData,
ByteBuffer source,
boolean useFallback) {
int[][] stateTable;
char[] unicodeCodeUnits;
int offset;
int state;
int action;
int c;
int entry;
/* set up the local pointers */
stateTable=sharedData.mbcs.stateTable;
unicodeCodeUnits=sharedData.mbcs.unicodeCodeUnits;
/* converter state */
offset=0;
state=sharedData.mbcs.dbcsOnlyState;
/* conversion loop */
for(;;) {
if (source.hasRemaining() == false) {
/* no input at all: "illegal" */
return 0xffff;
}
int sourceByte = source.get() & UConverterConstants.UNSIGNED_BYTE_MASK;
entry = stateTable[state][sourceByte];
if (MBCS_ENTRY_IS_TRANSITION(entry)) {
state = MBCS_ENTRY_TRANSITION_STATE(entry);
offset+=MBCS_ENTRY_TRANSITION_OFFSET(entry);
} else {
/*
* An if-else-if chain provides more reliable performance for
* the most common cases compared to a switch.
*/
action = MBCS_ENTRY_FINAL_ACTION(entry);
if(action==MBCS_STATE_VALID_16) {
offset+=MBCS_ENTRY_FINAL_VALUE_16(entry);
c=unicodeCodeUnits[offset];
if(c!=0xfffe) {
/* done */
} else if (useFallback) {
c = getFallback(sharedData.mbcs, offset);
/* else done with 0xfffe */
}
break;
} else if(action==MBCS_STATE_VALID_DIRECT_16) {
/* output BMP code point */
c = MBCS_ENTRY_FINAL_VALUE_16(entry);
break;
} else if (action==MBCS_STATE_VALID_16_PAIR) {
offset += MBCS_ENTRY_FINAL_VALUE_16(entry);
c=unicodeCodeUnits[offset++];
if(c<0xd800) {
/* output BMP code point below 0xd800 */
} else if (useFallback ? c<=0xdfff : c<=0xdbff) {
/* output roundtrip or fallback supplementary code point */
c = (((c&0x3ff)<<10) + unicodeCodeUnits[offset] + (0x10000-0xdc00));
} else if(useFallback ? (c&0xfffe)==0xe000 : c==0xe000) {
/* output roundtrip BMP code point above 0xd800 or fallback BMP code point */
c=unicodeCodeUnits[offset];
} else if(c==0xffff) {
return 0xffff;
} else {
c=0xfffe;
}
break;
} else if(action==MBCS_STATE_VALID_DIRECT_20) {
/* output supplementary code point */
c=0x10000+MBCS_ENTRY_FINAL_VALUE(entry);
break;
} else if(action==MBCS_STATE_FALLBACK_DIRECT_16) {
if(!useFallback) {
c=0xfffe;
break;
}
/* output BMP code point */
c=MBCS_ENTRY_FINAL_VALUE_16(entry);
break;
} else if(action==MBCS_STATE_FALLBACK_DIRECT_20) {
if(!useFallback) {
c=0xfffe;
break;
}
/* output supplementary code point */
c=0x10000+MBCS_ENTRY_FINAL_VALUE(entry);
break;
} else if(action==MBCS_STATE_UNASSIGNED) {
c=0xfffe;
break;
}
/*
* forbid MBCS_STATE_CHANGE_ONLY for this function,
* and MBCS_STATE_ILLEGAL and reserved action codes
*/
c = 0xffff;
break;
}
}
if(c==0xfffe) {
/* try an extension mapping */
ByteBuffer cx=sharedData.mbcs.extIndexes;
cx.position(0);
if(cx != null) {
source.position(0);
return extSimpleMatchToU(cx, source, useFallback, sharedData);
}
}
return c;
}
/* This private static method is use by MBCSSimpleGetNextUChar for extension mapping.*/
private static int extSimpleMatchToU(ByteBuffer cx, ByteBuffer source, boolean useFallback, UConverterSharedData sharedData) {
int[] value = new int[1];
int match;
if (source.remaining() <= 0) {
return 0xffff;
}
/* try to match */
match = extMatchToU(cx, (byte)-1, source, null, value,
useFallback, true, sharedData);
if (match == source.array().length) {
/* write result for simple, single-character conversion */
if (TO_U_IS_CODE_POINT(value[0])) {
return TO_U_GET_CODE_POINT(value[0]);
}
}
/*
* return no match because
* - match>0 && value points to string: simple conversion cannot handle multiple code points
* - match>0 && match!=length: not all input consumed, forbidden for this function
* - match==0: no match found in the first place
* - match<0: partial match, not supported for simple conversion (and flush==TRUE)
*/
return 0xfffe;
}
/* This private static method is use by extSimpleMatchToU for extension mapping. */
private static int extMatchToU(ByteBuffer cx, byte sisoState, ByteBuffer pre, ByteBuffer src,
int[] pMatchValue, boolean isUseFallback, boolean flush, UConverterSharedData sharedData) {
IntBuffer toUTable, toUSection;
int preLength = pre.array().length;
int value, matchValue, srcLength;
int i, j, index, length, matchLength;
short b;
if (src == null) {
srcLength = 0;
} else {
srcLength = src.array().length;
}
if (cx == null || cx.getInt(EXT_TO_U_LENGTH) <= 0) {
return 0; /* no extension data, no match */
}
/* initialize */
toUTable = (IntBuffer)ARRAY(cx, EXT_TO_U_INDEX, int.class);//(IntBuffer) ARRAY(cx, EXT_TO_U_INDEX, int.class);
index = 0;
matchValue = 0;
i = j = matchLength = 0;
if (sisoState == 0) {
/* SBCS state of an SI/SO stateful converter, look at only exactly 1 byte */
if (preLength > 1) {
return 0; /* no match of a DBCS sequence in SBCS mode */
} else if (preLength == 1) {
srcLength = 0;
} else /* preLength==0 */{
if (srcLength > 1) {
srcLength = 1;
}
}
flush = true;
}
/* we must not remember fallback matches when not using fallbacks */
/* match input units until there is a full match or the input is consumed */
for (;;) {
/* go to the next section */
int oldpos = toUTable.position();
toUSection = ((IntBuffer) toUTable.position(index)).slice();
toUTable.position(oldpos);
/* read first pair of the section */
value = toUSection.get();
length = TO_U_GET_BYTE(value);
value = TO_U_GET_VALUE(value);
if (value != 0 && (TO_U_IS_ROUNDTRIP(value)) /*|| isToUUseFallback(isUseFallback))*/
&& TO_U_VERIFY_SISO_MATCH(sisoState, i + j)) {
/* remember longest match so far */
matchValue = value;
matchLength = i + j;
}
/* match pre[] then src[] */
if (i < preLength) {
b = (short) (pre.get(i++) & UConverterConstants.UNSIGNED_BYTE_MASK);
} else if (j < srcLength) {
b = (short) (src.get(j++) & UConverterConstants.UNSIGNED_BYTE_MASK);
} else {
/* all input consumed, partial match */
if (flush || (length = (i + j)) > MAX_BYTES) {
/*
* end of the entire input stream, stop with the longest match so far or: partial match must not
* be longer than UCNV_EXT_MAX_BYTES because it must fit into state buffers
*/
break;
} else {
/* continue with more input next time */
return -length;
}
}
/* search for the current UChar */
value = findToU(toUSection, length, b);
if (value == 0) {
/* no match here, stop with the longest match so far */
break;
} else {
if (TO_U_IS_PARTIAL(value)) {
/* partial match, continue */
index = TO_U_GET_PARTIAL_INDEX(value);
} else {
if (TO_U_IS_ROUNDTRIP(value) /*|| isToUUseFallback(isUseFallback)) */&& TO_U_VERIFY_SISO_MATCH(sisoState, i + j)) {
/* full match, stop with result */
matchValue = value;
matchLength = i + j;
} else {
/* full match on fallback not taken, stop with the longest match so far */
}
break;
}
}
}
if (matchLength == 0) {
/* no match at all */
return 0;
}
/* return result */
pMatchValue[0] = TO_U_MASK_ROUNDTRIP(matchValue);
return matchLength;
}
/*
* This is another simple conversion function for internal use by other
* conversion implementations.
* It does not use the converter state nor call callbacks.
* It does not handle the EBCDIC swaplfnl option (set in UConverter).
* It handles conversion extensions but not GB 1830.
*
* It converts a single Unicode code point into code page bytes, encoded
* as one 32-bit value. The function returns the number of bytes in *pValue:
* 1..4 the number of bytes in *pValue
* 0 unassigned (*pValue undefined)
* -1 illegal (currently not used, *pValue undefined)
* *pValue will contain the resulting bytes with the last byte in bits 7..0,
* the second to last byte in bits 15..8, etc.
* Currently the function assumes but does not check that 0<=c<=0x10ffff.
*/
static int MBCSFromUChar32_ISO2022(UConverterSharedData sharedData, int c, int[] value, boolean useFallback,
int outputType) { // Output Type from MBCS, e.g. CharsetMBCS.MBCS_OUTPUT_2
ByteBuffer cx;
char[] table;
int stage2Entry;
int myValue;
int length;
int p;
/* BMP-only codepages are stored without stage 1 entries for supplementary code points */
if (c<0x10000 || (sharedData.mbcs.unicodeMask& UConverterConstants.HAS_SUPPLEMENTARY) != 0) {
table = sharedData.mbcs.fromUnicodeTable;
stage2Entry = MBCS_STAGE_2_FROM_U(table, c);
/* get the bytes and the length for the output */
if (outputType == MBCS_OUTPUT_2) {
myValue = MBCS_VALUE_2_FROM_STAGE_2(sharedData.mbcs.fromUnicodeBytes, stage2Entry, c);
if (myValue <= 0xff) {
length = 1;
} else {
length = 2;
}
} else { /* outputType == MBCS_OUTPUT_3 */
byte[] bytes = sharedData.mbcs.fromUnicodeBytes;
p = MBCS_POINTER_3_FROM_STAGE_2(bytes, stage2Entry, c);
myValue = ((bytes[p] & UConverterConstants.UNSIGNED_BYTE_MASK)<<16) |
((bytes[p+1] & UConverterConstants.UNSIGNED_BYTE_MASK)<<8) |
(bytes[p+2] & UConverterConstants.UNSIGNED_BYTE_MASK);
if (myValue <= 0xff) {
length = 1;
} else if (myValue <= 0xffff) {
length = 2;
} else {
length = 3;
}
}
/* is this code point assigned, or do we use fallbacks? */
if ((stage2Entry&(1<<(16+(c&0xf)))) != 0) {
/* assigned */
value[0] = myValue;
return length;
} else if (CharsetEncoderICU.isFromUUseFallback(useFallback, c) && myValue != 0) {
/*
* We allow a 0 byte output if the "assigned" bit is set for this entry.
* There is no way with this data structure for fallback output
* to be a zero byte.
*/
value[0] = myValue;
return -length;
}
}
cx = sharedData.mbcs.extIndexes;
if (cx != null) {
return extSimpleMatchFromU(cx, c, value, useFallback);
}
return 0;
}
/*
* Used by ISO 2022 implementation
* @return number of bytes in pValue; negative number if fallback; 0 for no mapping
*/
private static int extSimpleMatchFromU(ByteBuffer cx, int c, int[] pValue, boolean useFallback) {
int match;
int[] value = new int[1];
/*try to match */
match = extMatchFromU(cx, c, null, null, value, useFallback, true);
if (match >= 2) {
int length;
boolean isRoundtrip;
isRoundtrip = FROM_U_IS_ROUNDTRIP(value[0]);
length = FROM_U_GET_LENGTH(value[0]);
value[0] = FROM_U_GET_DATA(value[0]);
if (length <= EXT_FROM_U_MAX_DIRECT_LENGTH) {
pValue[0] = value[0];
return isRoundtrip ? length : -length;
}
}
/*
* return no match because
* - match>1 && resultLength>4: result too long for simple conversion
* - match==1: no match found, <subchar1> preferred
* - match==0: no match found in the first place
* - match<0: partial match, not supported for simple conversion (and flush==true)
*/
return 0;
}
private static int extMatchFromU(ByteBuffer cx, int firstCP, char[] pre, char[] src, int[] pMatchValue, boolean useFallback, boolean flush) {
CharBuffer stage12, stage3;
IntBuffer stage3b;
CharBuffer fromUTableUChars, fromUSectionUChars;
IntBuffer fromUTableValues, fromUSectionValues;
int value, matchValue;
int i, j, index, length, matchLength;
char c;
if (cx == null) {
return 0; /* no extension data, no match */
}
/* trie lookup of firstCP */
index = firstCP>>10; /* stage 1 index */
if (index>=cx.getInt(EXT_FROM_U_STAGE_1_LENGTH*4)) { // need to find the correct int in the bytebuffer
return 0; /* the first code point is outside the trie */
}
stage12 = (CharBuffer)ARRAY(cx, EXT_FROM_U_STAGE_12_INDEX, char.class);
stage3 = (CharBuffer)ARRAY(cx, EXT_FROM_U_STAGE_3_INDEX, char.class);
index = FROM_U(stage12, stage3, index, firstCP);
stage3b = (IntBuffer)ARRAY(cx, EXT_FROM_U_STAGE_3B_INDEX, int.class);
value = stage3b.get(index);
if (value == 0) {
return 0;
}
/*
* Tests for (value&EXT_FROM_U_RESERVED_MASK) == 0:
* Do not interpret values with reserved bits used, for forward compatibility,
* and do not even remember intermediate results with reserved bits used.
*/
if (TO_U_IS_PARTIAL(value)) {
/* partial match, enter the loop below */
index = FROM_U_GET_PARTIAL_INDEX(value);
/* initialize */
fromUTableUChars = (CharBuffer)ARRAY(cx, EXT_FROM_U_UCHARS_INDEX, char.class);
fromUTableValues = (IntBuffer)ARRAY(cx, EXT_FROM_U_VALUES_INDEX, int.class);
matchValue = 0;
i = j = matchLength = 0;
/* we must not remember fallback matches when not using fallbacks */
/*match inputs until there is a full match or the input is consumed */
for(;;) {
/* go to the next section */
int oldpos = fromUTableUChars.position();
fromUSectionUChars = ((CharBuffer)fromUTableUChars.position(index)).slice();
fromUTableUChars.position(oldpos);
oldpos = fromUTableValues.position();
fromUSectionValues = ((IntBuffer)fromUTableValues.position(index)).slice();
fromUTableValues.position(oldpos);
/*read first pair of the section */
length = fromUSectionUChars.get();
value = fromUSectionValues.get();
if (value != 0 &&
(FROM_U_IS_ROUNDTRIP(value) || CharsetEncoderICU.isFromUUseFallback(useFallback, firstCP)) &&
(value&FROM_U_RESERVED_MASK) == 0) {
/* remember longest match so far */
matchValue = value;
matchLength = 2 + i + j;
}
/* match pre[] then src[] */
if (pre != null && i < pre.length) {
c = pre[i++];
} else if (src != null && j < src.length) {
c = src[j++];
} else {
/* all input consumed, partial match */
if (flush || (length=(i+j))> MAX_UCHARS) {
/*
* end of the entire input stream, stop with the longest match so far
* or: partial match must not be longer than MAX_UCHARS
* because it must fit into state buffers
*/
break;
} else {
/* continue with more input next time */
return -(2+length);
}
}
/* search for the current UChar */
index = findFromU(fromUSectionUChars, length, c);
if (index < 0) {
/* no match here, stop with the longest match so far */
break;
} else {
value = fromUSectionValues.get(index);
if (FROM_U_IS_PARTIAL(value)) {
/* partial match, continue */
index = FROM_U_GET_PARTIAL_INDEX(value);
} else {
if ((FROM_U_IS_ROUNDTRIP(value) || CharsetEncoderICU.isFromUUseFallback(useFallback, firstCP)) &&
(value&FROM_U_RESERVED_MASK) == 0 ) {
/* full match, stop with result */
matchValue = value;
matchLength = 2 + i + j;
} else {
/* full match on fallback not taken, stop with the longest match so far */
}
break;
}
}
}
if (matchLength == 0) {
/* no match at all */
return 0;
}
} else { /* result from firstCP trie lookup */
if ((FROM_U_IS_ROUNDTRIP(value) || CharsetEncoderICU.isFromUUseFallback(useFallback, firstCP)) &&
(value&FROM_U_RESERVED_MASK) == 0) {
/* full match, stop with result */
matchValue = value;
matchLength = 2;
} else {
/* fallback not taken */
return 0;
}
}
/* return result */
if (matchValue == FROM_U_SUBCHAR1) {
return 1; /* assert matchLength == 2 */
}
pMatchValue[0] = matchValue;
return matchLength;
}
/*
* @param is the the output byte
* @return 1 roundtrip byte 0 no mapping -1 fallback byte
*/
static int MBCSSingleFromUChar32(UConverterSharedData sharedData, int c, int[] retval, boolean useFallback) {
char[] table;
int value;
/* BMP-only codepages are stored without stage 1 entries for supplementary code points */
if (c >= 0x10000 && (sharedData.mbcs.unicodeMask&UConverterConstants.HAS_SUPPLEMENTARY) == 0) {
return 0;
}
/* convert the Unicode code point in c into codepage bytes */
table = sharedData.mbcs.fromUnicodeTable;
/* get the byte for the output */
value = MBCS_SINGLE_RESULT_FROM_U(table, sharedData.mbcs.fromUnicodeBytes, c);
/* get the byte for the output */
retval[0] = value & 0xff;
if (value >= 0xf00) {
return 1; /* roundtrip */
} else if (useFallback ? value>=0x800 : value>=0xc00) {
return -1; /* fallback taken */
} else {
return 0; /* no mapping */
}
}
class CharsetDecoderMBCS extends CharsetDecoderICU {
CharsetDecoderMBCS(CharsetICU cs) {