From b6df9c649e1c32ee71cb7bafaa89384fe07940ac Mon Sep 17 00:00:00 2001 From: Michael Ow Date: Wed, 27 Feb 2008 17:48:58 +0000 Subject: [PATCH] ICU-6144 Add new methods to specify callback on malform and unmapple errors. X-SVN-Rev: 23490 --- .../ibm/icu/charset/CharsetDecoderICU.java | 21 + .../ibm/icu/charset/CharsetEncoderICU.java | 634 ++++++++++-------- 2 files changed, 388 insertions(+), 267 deletions(-) diff --git a/icu4j/src/com/ibm/icu/charset/CharsetDecoderICU.java b/icu4j/src/com/ibm/icu/charset/CharsetDecoderICU.java index 9bedab2ee9..857b09ce69 100644 --- a/icu4j/src/com/ibm/icu/charset/CharsetDecoderICU.java +++ b/icu4j/src/com/ibm/icu/charset/CharsetDecoderICU.java @@ -129,6 +129,16 @@ public abstract class CharsetDecoderICU extends CharsetDecoder{ onMalformedInput = getCallback(newAction); } + /** + * Sets the callback decoder method to be used if an illegal sequence is encountered. + * + * @param newCallback CharsetCallback.Decoder + * @exception IllegalArgumentException + * @draft ICU 4.0 + */ + public final void onMalformedInput(CharsetCallback.Decoder newCallback) { + onMalformedInput = newCallback; + } /** * Sets the action to be taken if an illegal sequence is encountered * @@ -150,6 +160,17 @@ public abstract class CharsetDecoderICU extends CharsetDecoder{ onUnmappableCharacter = getCallback(newAction); } + + /** + * Sets the callback decoder method to be used if an illegal sequence is encountered. + * + * @param newCallback CharsetCallback.Decoder + * @exception IllegalArgumentException + * @draft ICU 4.0 + */ + public final void onUnmappableCharacter(CharsetCallback.Decoder newCallback) { + onUnmappableCharacter = newCallback; + } private static CharsetCallback.Decoder getCallback(CodingErrorAction action){ if(action==CodingErrorAction.REPLACE){ return CharsetCallback.TO_U_CALLBACK_SUBSTITUTE; diff --git a/icu4j/src/com/ibm/icu/charset/CharsetEncoderICU.java b/icu4j/src/com/ibm/icu/charset/CharsetEncoderICU.java index 689f40a7d4..f19fff444b 100644 --- a/icu4j/src/com/ibm/icu/charset/CharsetEncoderICU.java +++ b/icu4j/src/com/ibm/icu/charset/CharsetEncoderICU.java @@ -1,11 +1,11 @@ /** -******************************************************************************* -* Copyright (C) 2006-2008, International Business Machines Corporation and * -* others. All Rights Reserved. * -******************************************************************************* -* -******************************************************************************* -*/ + ******************************************************************************* + * Copyright (C) 2006-2008, International Business Machines Corporation and * + * others. All Rights Reserved. * + ******************************************************************************* + * + ******************************************************************************* + */ package com.ibm.icu.charset; @@ -29,51 +29,63 @@ import com.ibm.icu.text.UTF16; * @provisional This API might change or be removed in a future release. */ public abstract class CharsetEncoderICU extends CharsetEncoder { - + /* this is used in fromUnicode DBCS tables as an "unassigned" marker */ static final char MISSING_CHAR_MARKER = '\uFFFF'; byte[] errorBuffer = new byte[30]; + int errorBufferLength = 0; - + /** these are for encodeLoopICU */ int fromUnicodeStatus; + int fromUChar32; + boolean useSubChar1; + boolean useFallback; - + /* maximum number of indexed UChars */ static final int EXT_MAX_UCHARS = 19; - + /* store previous UChars/chars to continue partial matches */ int preFromUFirstCP; /* >=0: partial match */ + char[] preFromUArray = new char[EXT_MAX_UCHARS]; + int preFromUBegin; - int preFromULength; /* negative: replay */ - - char[] invalidUCharBuffer = new char[2]; - int invalidUCharLength; + + int preFromULength; /* negative: replay */ + + char[] invalidUCharBuffer = new char[2]; + + int invalidUCharLength; + Object fromUContext; + private CharsetCallback.Encoder onUnmappableInput = CharsetCallback.FROM_U_CALLBACK_STOP; + private CharsetCallback.Encoder onMalformedInput = CharsetCallback.FROM_U_CALLBACK_STOP; + CharsetCallback.Encoder fromCharErrorBehaviour = new CharsetCallback.Encoder() { - public CoderResult call(CharsetEncoderICU encoder, Object context, CharBuffer source, - ByteBuffer target, IntBuffer offsets, char[] buffer, int length, int cp, - CoderResult cr) { + public CoderResult call(CharsetEncoderICU encoder, Object context, + CharBuffer source, ByteBuffer target, IntBuffer offsets, + char[] buffer, int length, int cp, CoderResult cr) { if (cr.isUnmappable()) { - return onUnmappableInput.call(encoder, context, source, target, offsets, buffer, - length, cp, cr); + return onUnmappableInput.call(encoder, context, source, target, + offsets, buffer, length, cp, cr); } else if (cr.isMalformed()) { - return onMalformedInput.call(encoder, context, source, target, offsets, buffer, - length, cp, cr); + return onMalformedInput.call(encoder, context, source, target, + offsets, buffer, length, cp, cr); } - return CharsetCallback.FROM_U_CALLBACK_STOP.call(encoder, context, source, target, - offsets, buffer, length, cp, cr); + return CharsetCallback.FROM_U_CALLBACK_STOP.call(encoder, context, + source, target, offsets, buffer, length, cp, cr); } }; - /** + /** * Construcs a new encoder for the given charset * * @param cs @@ -84,7 +96,8 @@ public abstract class CharsetEncoderICU extends CharsetEncoder { * @provisional This API might change or be removed in a future release. */ CharsetEncoderICU(CharsetICU cs, byte[] replacement) { - super(cs, (cs.minBytesPerChar+cs.maxBytesPerChar)/2, cs.maxBytesPerChar, replacement); + super(cs, (cs.minBytesPerChar + cs.maxBytesPerChar) / 2, + cs.maxBytesPerChar, replacement); } /** @@ -99,7 +112,7 @@ public abstract class CharsetEncoderICU extends CharsetEncoder { public boolean isFallbackUsed() { return useFallback; } - + /** * Sets whether this Encoder can use fallbacks? * @param usesFallback true if the user wants the converter to take @@ -118,16 +131,18 @@ public abstract class CharsetEncoderICU extends CharsetEncoder { * @provisional This API might change or be removed in a future release. */ final boolean isFromUUseFallback(int c) { - return (useFallback) || (UCharacter.getType(c) == UCharacter.PRIVATE_USE); + return (useFallback) + || (UCharacter.getType(c) == UCharacter.PRIVATE_USE); } - + /** * Use fallbacks from Unicode to codepage when useFallback or for private-use code points */ static final boolean isFromUUseFallback(boolean iUseFallback, int c) { - return (iUseFallback) || (UCharacter.getType(c) == UCharacter.PRIVATE_USE); + return (iUseFallback) + || (UCharacter.getType(c) == UCharacter.PRIVATE_USE); } - + /** * Sets the action to be taken if an illegal sequence is encountered * @@ -136,11 +151,22 @@ public abstract class CharsetEncoderICU extends CharsetEncoder { * @exception IllegalArgumentException * @stable ICU 3.6 */ - protected void implOnMalformedInput(CodingErrorAction newAction) { - onMalformedInput = getCallback(newAction); - } + protected void implOnMalformedInput(CodingErrorAction newAction) { + onMalformedInput = getCallback(newAction); + } - /** + /** + * Sets the callback encoder method to be used if an illegal sequence is encountered. + * + * @param newCallback CharsetCallback.Encoder + * @exception IllegalArgumentException + * @draft ICU 4.0 + */ + public final void onMalformedInput(CharsetCallback.Encoder newCallback) { + onMalformedInput = newCallback; + } + + /** * Sets the action to be taken if an illegal sequence is encountered * * @param newAction @@ -150,71 +176,83 @@ public abstract class CharsetEncoderICU extends CharsetEncoder { */ protected void implOnUnmappableCharacter(CodingErrorAction newAction) { onUnmappableInput = getCallback(newAction); - } - - private static CharsetCallback.Encoder getCallback(CodingErrorAction action){ - if(action==CodingErrorAction.REPLACE){ + } + + /** + * Sets the callback encoder method to be used if an illegal sequence is encountered. + * + * @param newCallback CharsetCallback.Encoder + * @exception IllegalArgumentException + * @draft ICU 4.0 + */ + public final void onUnmappableInput(CharsetCallback.Encoder newCallback) { + onUnmappableInput = newCallback; + } + + private static CharsetCallback.Encoder getCallback(CodingErrorAction action) { + if (action == CodingErrorAction.REPLACE) { return CharsetCallback.FROM_U_CALLBACK_SUBSTITUTE; - }else if(action==CodingErrorAction.IGNORE){ + } else if (action == CodingErrorAction.IGNORE) { return CharsetCallback.FROM_U_CALLBACK_SKIP; - }else if(action==CodingErrorAction.REPORT){ + } else if (action == CodingErrorAction.REPORT) { return CharsetCallback.FROM_U_CALLBACK_STOP; } return CharsetCallback.FROM_U_CALLBACK_STOP; } private static final CharBuffer EMPTY = CharBuffer.allocate(0); - /** - * Flushes any characters saved in the converter's internal buffer and - * resets the converter. - * @param out action to be taken - * @return result of flushing action and completes the decoding all input. - * Returns CoderResult.UNDERFLOW if the action succeeds. + + /** + * Flushes any characters saved in the converter's internal buffer and + * resets the converter. + * @param out action to be taken + * @return result of flushing action and completes the decoding all input. + * Returns CoderResult.UNDERFLOW if the action succeeds. * @stable ICU 3.6 - */ + */ protected CoderResult implFlush(ByteBuffer out) { return encode(EMPTY, out, null, true); - } + } - /** - * Resets the from Unicode mode of converter + /** + * Resets the from Unicode mode of converter * @stable ICU 3.6 - */ + */ protected void implReset() { - errorBufferLength=0; + errorBufferLength = 0; fromUnicodeStatus = 0; fromUChar32 = 0; fromUnicodeReset(); - } - + } + private void fromUnicodeReset() { preFromUBegin = 0; preFromUFirstCP = UConverterConstants.U_SENTINEL; preFromULength = 0; } - /** - * Encodes one or more chars. The default behaviour of the - * converter is stop and report if an error in input stream is encountered. - * To set different behaviour use @see CharsetEncoder.onMalformedInput() - * @param in buffer to decode - * @param out buffer to populate with decoded result - * @return result of decoding action. Returns CoderResult.UNDERFLOW if the decoding - * action succeeds or more input is needed for completing the decoding action. + /** + * Encodes one or more chars. The default behaviour of the + * converter is stop and report if an error in input stream is encountered. + * To set different behaviour use @see CharsetEncoder.onMalformedInput() + * @param in buffer to decode + * @param out buffer to populate with decoded result + * @return result of decoding action. Returns CoderResult.UNDERFLOW if the decoding + * action succeeds or more input is needed for completing the decoding action. * @stable ICU 3.6 - */ + */ protected CoderResult encodeLoop(CharBuffer in, ByteBuffer out) { - if(!in.hasRemaining()){ + if (!in.hasRemaining()) { // The Java framework should have already substituted what was left. fromUChar32 = 0; //fromUnicodeReset(); return CoderResult.UNDERFLOW; } - in.position(in.position()+fromUCountPending()); + in.position(in.position() + fromUCountPending()); /* do the conversion */ CoderResult ret = encode(in, out, null, false); setSourcePosition(in); - if(ret.isUnderflow() && in.hasRemaining()){ + if (ret.isUnderflow() && in.hasRemaining()) { // The Java framework is going to substitute what is left. fromUnicodeReset(); } @@ -230,8 +268,9 @@ public abstract class CharsetEncoderICU extends CharsetEncoder { * @draft ICU 3.6 * @provisional This API might change or be removed in a future release. */ - abstract CoderResult encodeLoop(CharBuffer source, ByteBuffer target, IntBuffer offsets, boolean flush); - + abstract CoderResult encodeLoop(CharBuffer source, ByteBuffer target, + IntBuffer offsets, boolean flush); + /** * Implements ICU semantics for encoding the buffer * @param source The input character buffer @@ -243,11 +282,11 @@ public abstract class CharsetEncoderICU extends CharsetEncoder { * @draft ICU 3.6 * @provisional This API might change or be removed in a future release. */ - final CoderResult encode(CharBuffer source, ByteBuffer target, IntBuffer offsets, boolean flush){ + final CoderResult encode(CharBuffer source, ByteBuffer target, + IntBuffer offsets, boolean flush) { - - /* check parameters */ - if(target==null || source==null) { + /* check parameters */ + if (target == null || source == null) { throw new IllegalArgumentException(); } @@ -263,44 +302,44 @@ public abstract class CharsetEncoderICU extends CharsetEncoder { * consumed or the target filled (unless an error occurs). * An adjustment would be targetLimit=t+0x7fffffff; for example. */ - + /* flush the target overflow buffer */ - if(errorBufferLength>0) { + if (errorBufferLength > 0) { byte[] overflowArray; int i, length; - - overflowArray=errorBuffer; - length=errorBufferLength; - i=0; + + overflowArray = errorBuffer; + length = errorBufferLength; + i = 0; do { - if(target.remaining()==0) { + if (target.remaining() == 0) { /* the overflow buffer contains too much, keep the rest */ - int j=0; - + int j = 0; + do { - overflowArray[j++]=overflowArray[i++]; - } while(i=0) { + + if (!flush && source.remaining() == 0 && preFromULength >= 0) { /* the overflow buffer is emptied and there is no new input: we are done */ return CoderResult.UNDERFLOW; } - + /* * Do not simply return with a buffer overflow error if * !flush && t==targetLimit @@ -308,7 +347,7 @@ public abstract class CharsetEncoderICU extends CharsetEncoder { * For example, the skip callback may be called; * it does not output anything. */ - + return fromUnicodeWithCallback(source, target, offsets, flush); } @@ -336,45 +375,45 @@ public abstract class CharsetEncoderICU extends CharsetEncoder { * @draft ICU 3.6 * @provisional This API might change or be removed in a future release. */ - final CoderResult fromUnicodeWithCallback(CharBuffer source, ByteBuffer target, IntBuffer offsets, boolean flush){ + final CoderResult fromUnicodeWithCallback(CharBuffer source, + ByteBuffer target, IntBuffer offsets, boolean flush) { int sBufferIndex; int sourceIndex; int errorInputLength; boolean converterSawEndOfInput, calledCallback; - /* variables for m:n conversion */ CharBuffer replayArray = CharBuffer.allocate(EXT_MAX_UCHARS); - int replayArrayIndex=0; + int replayArrayIndex = 0; CharBuffer realSource; boolean realFlush; - - CoderResult cr = CoderResult.UNDERFLOW; - - /* get the converter implementation function */ - sourceIndex=0; - if(preFromULength>=0) { + CoderResult cr = CoderResult.UNDERFLOW; + + /* get the converter implementation function */ + sourceIndex = 0; + + if (preFromULength >= 0) { /* normal mode */ - realSource=null; - realFlush=false; + realSource = null; + realFlush = false; } else { /* * Previous m:n conversion stored source units from a partial match * and failed to consume all of them. * We need to "replay" them from a temporary buffer and convert them first. */ - realSource=source; + realSource = source; realFlush = flush; - + //UConverterUtility.uprv_memcpy(replayArray, replayArrayIndex, preFromUArray, 0, -preFromULength*UMachine.U_SIZEOF_UCHAR); - replayArray.put(preFromUArray,0, -preFromULength); + replayArray.put(preFromUArray, 0, -preFromULength); source.position(replayArrayIndex); - source.limit(replayArrayIndex-preFromULength); //preFromULength is negative, see declaration - source=replayArray; - flush=false; - - preFromULength=0; + source.limit(replayArrayIndex - preFromULength); //preFromULength is negative, see declaration + source = replayArray; + flush = false; + + preFromULength = 0; } /* @@ -389,7 +428,7 @@ public abstract class CharsetEncoderICU extends CharsetEncoder { * } * } */ - for(;;) { + for (;;) { /* convert */ cr = encodeLoop(source, target, offsets, flush); /* @@ -399,13 +438,14 @@ public abstract class CharsetEncoderICU extends CharsetEncoder { * need not check cnv.preFromULength==0 because a replay (<0) will cause * s0) { - + if (length > 0) { + /* * if a converter handles offsets and updates the offsets * pointer at the end, then offset should not change @@ -428,71 +468,71 @@ public abstract class CharsetEncoderICU extends CharsetEncoder { * however, some converters do not handle offsets at all * (sourceIndex<0) or may not update the offsets pointer */ - offsets.position(offsets.position()+length); + offsets.position(offsets.position() + length); } - - if(sourceIndex>=0) { - sourceIndex+=(int)(source.position()); + + if (sourceIndex >= 0) { + sourceIndex += (int) (source.position()); } } - if(preFromULength<0) { + if (preFromULength < 0) { /* * switch the source to new replay units (cannot occur while replaying) * after offset handling and before end-of-input and callback handling */ - if(realSource==null) { - realSource=source; - realFlush=flush; - + if (realSource == null) { + realSource = source; + realFlush = flush; + //UConverterUtility.uprv_memcpy(replayArray, replayArrayIndex, preFromUArray, 0, -preFromULength*UMachine.U_SIZEOF_UCHAR); - replayArray.put(preFromUArray,0, -preFromULength); - - source=replayArray; + replayArray.put(preFromUArray, 0, -preFromULength); + + source = replayArray; source.position(replayArrayIndex); - source.limit(replayArrayIndex-preFromULength); - flush=false; - if((sourceIndex+=preFromULength)<0) { - sourceIndex=-1; + source.limit(replayArrayIndex - preFromULength); + flush = false; + if ((sourceIndex += preFromULength) < 0) { + sourceIndex = -1; } - - preFromULength=0; + + preFromULength = 0; } else { /* see implementation note before _fromUnicodeWithCallback() */ //agljport:todo U_ASSERT(realSource==NULL); - Assert.assrt(realSource==null); + Assert.assrt(realSource == null); } } /* update pointers */ - sBufferIndex=source.position(); - if(cr.isUnderflow()) { - if(sBufferIndex0) { + + length = source.remaining(); + if (length > 0) { //UConverterUtility.uprv_memcpy(preFromUArray, 0, sourceArray, pArgs.sourceBegin, length*UMachine.U_SIZEOF_UCHAR); - source.get(preFromUArray, 0, length ); - preFromULength=(byte)-length; + source.get(preFromUArray, 0, length); + preFromULength = (byte) -length; } - source=realSource; - flush=realFlush; + source = realSource; + flush = realFlush; } return cr; } @@ -552,16 +591,19 @@ public abstract class CharsetEncoderICU extends CharsetEncoder { /* callback handling */ { /* get and write the code point */ - errorInputLength = UTF16.append(invalidUCharBuffer, 0, fromUChar32); + errorInputLength = UTF16.append(invalidUCharBuffer, 0, + fromUChar32); invalidUCharLength = errorInputLength; - + /* set the converter state to deal with the next character */ - fromUChar32=0; - + fromUChar32 = 0; + /* call the callback function */ - cr = fromCharErrorBehaviour.call(this, fromUContext, source, target, offsets, invalidUCharBuffer, invalidUCharLength, fromUChar32, cr); + cr = fromCharErrorBehaviour.call(this, fromUContext, + source, target, offsets, invalidUCharBuffer, + invalidUCharLength, fromUChar32, cr); } - + /* * loop back to the offset handling * @@ -569,65 +611,66 @@ public abstract class CharsetEncoderICU extends CharsetEncoder { * that a callback was called; * if the callback did not resolve the error, then we return */ - calledCallback=true; + calledCallback = true; } } } - /* - * Ascertains if a given Unicode code point (32bit value for handling surrogates) - * can be converted to the target encoding. If the caller wants to test if a - * surrogate pair can be converted to target encoding then the - * responsibility of assembling the int value lies with the caller. - * For assembling a code point the caller can use UTF16 class of ICU4J and do something like: - *
-	 * while(i
-	 * or
-	 * 
-	 * String src = new String(mySource);
-	 * int i,codepoint;
-	 * boolean passed = false;
-	 * while(i0xfff)? 2:1;
-	 *	if(!(CharsetEncoderICU) myConv).canEncode(codepoint)){
-	 *	    passed = false;
-	 *	}
-	 * }
-	 * 
- * - * @param codepoint Unicode code point as int value - * @return true if a character can be converted + + /* + * Ascertains if a given Unicode code point (32bit value for handling surrogates) + * can be converted to the target encoding. If the caller wants to test if a + * surrogate pair can be converted to target encoding then the + * responsibility of assembling the int value lies with the caller. + * For assembling a code point the caller can use UTF16 class of ICU4J and do something like: + *
+     * while(i
+     * or
+     * 
+     * String src = new String(mySource);
+     * int i,codepoint;
+     * boolean passed = false;
+     * while(i0xfff)? 2:1;
+     *	if(!(CharsetEncoderICU) myConv).canEncode(codepoint)){
+     *	    passed = false;
+     *	}
+     * }
+     * 
+ * + * @param codepoint Unicode code point as int value + * @return true if a character can be converted * @draft ICU 3.6 - * @provisional This API might change or be removed in a future release. - */ + * @provisional This API might change or be removed in a future release. + */ /* TODO This is different from Java's canEncode(char) API. * ICU's API should implement getUnicodeSet, * and override canEncode(char) which queries getUnicodeSet. * The getUnicodeSet should return a frozen UnicodeSet or use a fillin parameter, like ICU4C. */ - /*public boolean canEncode(int codepoint) { - return true; + /*public boolean canEncode(int codepoint) { + return true; }*/ - /** + /** * Overrides super class method * @stable ICU 3.6 - */ - public boolean isLegalReplacement(byte[] repl){ - return true; + */ + public boolean isLegalReplacement(byte[] repl) { + return true; } - + /** * Writes out the specified output bytes to the target byte buffer or to converter internal buffers. * @param cnv @@ -641,43 +684,42 @@ public abstract class CharsetEncoderICU extends CharsetEncoder { * @draft ICU 3.6 * @provisional This API might change or be removed in a future release. */ - static final CoderResult fromUWriteBytes(CharsetEncoderICU cnv, - byte[] bytesArray, int bytesBegin, int bytesLength, - ByteBuffer out, IntBuffer offsets, int sourceIndex){ + static final CoderResult fromUWriteBytes(CharsetEncoderICU cnv, + byte[] bytesArray, int bytesBegin, int bytesLength, ByteBuffer out, + IntBuffer offsets, int sourceIndex) { //write bytes int obl = bytesLength; CoderResult cr = CoderResult.UNDERFLOW; int bytesLimit = bytesBegin + bytesLength; - try{ - for (;bytesBegin< bytesLimit;){ + try { + for (; bytesBegin < bytesLimit;) { out.put(bytesArray[bytesBegin]); bytesBegin++; } // success - bytesLength=0; - }catch( BufferOverflowException ex){ + bytesLength = 0; + } catch (BufferOverflowException ex) { cr = CoderResult.OVERFLOW; } - - - if(offsets!=null) { - while(obl>bytesLength) { + + if (offsets != null) { + while (obl > bytesLength) { offsets.put(sourceIndex); --obl; } } //write overflow cnv.errorBufferLength = bytesLimit - bytesBegin; - if(cnv.errorBufferLength >0) { - int index = 0; - while(bytesBegin 0) { + int index = 0; + while (bytesBegin < bytesLimit) { + cnv.errorBuffer[index++] = bytesArray[bytesBegin++]; + } cr = CoderResult.OVERFLOW; } - return cr; - } + return cr; + } /** * Returns the number of chars held in the converter's internal state @@ -688,28 +730,30 @@ public abstract class CharsetEncoderICU extends CharsetEncoder { * @draft ICU 3.4 * @provisional This API might change or be removed in a future release. */ - /*public*/ int fromUCountPending(){ - if(preFromULength > 0){ - return UTF16.getCharCount(preFromUFirstCP)+preFromULength ; - }else if(preFromULength < 0){ - return -preFromULength ; - }else if(fromUChar32 > 0){ + /*public*/int fromUCountPending() { + if (preFromULength > 0) { + return UTF16.getCharCount(preFromUFirstCP) + preFromULength; + } else if (preFromULength < 0) { + return -preFromULength; + } else if (fromUChar32 > 0) { return 1; - }else if(preFromUFirstCP >0){ + } else if (preFromUFirstCP > 0) { return UTF16.getCharCount(preFromUFirstCP); } - return 0; - } + return 0; + } + /** * * @param source */ - private final void setSourcePosition(CharBuffer source){ - + private final void setSourcePosition(CharBuffer source) { + // ok was there input held in the previous invocation of decodeLoop // that resulted in output in this invocation? source.position(source.position() - fromUCountPending()); } + /** * Write the codepage substitution character. * Subclasses to override this method. @@ -722,9 +766,8 @@ public abstract class CharsetEncoderICU extends CharsetEncoder { * @draft ICU 3.6 * @provisional This API might change or be removed in a future release. */ - CoderResult cbFromUWriteSub (CharsetEncoderICU encoder, - CharBuffer source, ByteBuffer target, - IntBuffer offsets){ + CoderResult cbFromUWriteSub(CharsetEncoderICU encoder, CharBuffer source, + ByteBuffer target, IntBuffer offsets) { CharsetICU cs = (CharsetICU) encoder.charset(); byte[] sub = encoder.replacement(); if (cs.subChar1 != 0 && encoder.invalidUCharBuffer[0] <= 0xff) { @@ -736,8 +779,64 @@ public abstract class CharsetEncoderICU extends CharsetEncoder { sub.length, target, offsets, source.position()); } } - - + + /** + * Write the characters to target. + * @param source The input character buffer + * @param target The output byte buffer + * @param offsets + * @return A CoderResult object that contains the error result when an error occurs. + * @draft ICU 4.0 + * @provisional This API might change or be removed in a future release. + * */ + CoderResult cbFromUWriteUChars(CharsetEncoderICU encoder, + CharBuffer source, ByteBuffer target, IntBuffer offsets) { + CoderResult cr = CoderResult.UNDERFLOW; + + /* This is a fun one. Recursion can occur - we're basically going to + * just retry shoving data through the same converter. Note, if you got + * here through some kind of invalid sequence, you maybe should emit a + * reset sequence of some kind. Since this IS an actual conversion, + * take care that you've changed the callback or the data, or you'll + * get an infinite loop. + */ + + int oldTargetPosition = target.position(); + int offsetIndex = source.position(); + + cr = encoder.encode(source, target, null, false); /* no offsets and no flush */ + + if (offsets != null) { + while (target.position() != oldTargetPosition) { + offsets.put(offsetIndex); + oldTargetPosition++; + } + } + + /* Note, if you did something like used a stop subcallback, things would get interesting. + * In fact, here's where we want to return the partially consumed in-source! + */ + if (cr.isOverflow()) { + /* Overflowed target. Now, we'll write into the charErrorBuffer. + * It's a fixed size. If we overflow it...Hm + */ + cr = CoderResult.UNDERFLOW; + + /* start the new target at the first free slot in the error buffer */ + int errBuffLen = encoder.errorBufferLength; + ByteBuffer newTarget = ByteBuffer.wrap(encoder.errorBuffer); + newTarget.position(errBuffLen); /* set the position at the end of the error buffer */ + encoder.errorBufferLength = 0; + + cr = encoder.encode(source, target, null, false); + + encoder.errorBuffer = newTarget.array(); + encoder.errorBufferLength = newTarget.position(); + } + + return cr; + } + /** *

* Handles a common situation where a character has been read and it may be @@ -767,24 +866,24 @@ public abstract class CharsetEncoderICU extends CharsetEncoder { fromUChar32 = lead; return CoderResult.malformedForLength(1); } - + if (!source.hasRemaining()) { fromUChar32 = lead; return CoderResult.UNDERFLOW; } - + char trail = source.get(); - + if (!UTF16.isTrailSurrogate(trail)) { fromUChar32 = lead; source.position(source.position() - 1); return CoderResult.malformedForLength(1); } - + fromUChar32 = UCharacter.getCodePoint(lead, trail); return null; } - + /** *

* Same as handleSurrogates(CharBuffer, char), but with arrays. As an added @@ -804,24 +903,25 @@ public abstract class CharsetEncoderICU extends CharsetEncoder { * @see handleSurrogates(CharBuffer, int, char) * @see handleSurrogates(char[], int, int, char) */ - final CoderResult handleSurrogates(char[] sourceArray, int sourceIndex, int sourceLimit, char lead) { + final CoderResult handleSurrogates(char[] sourceArray, int sourceIndex, + int sourceLimit, char lead) { if (!UTF16.isLeadSurrogate(lead)) { fromUChar32 = lead; return CoderResult.malformedForLength(1); } - + if (sourceIndex >= sourceLimit) { fromUChar32 = lead; return CoderResult.UNDERFLOW; } - + char trail = sourceArray[sourceIndex]; - + if (!UTF16.isTrailSurrogate(trail)) { fromUChar32 = lead; return CoderResult.malformedForLength(1); } - + fromUChar32 = UCharacter.getCodePoint(lead, trail); return null; }