/**
*******************************************************************************
* Copyright (C) 1996-2001, International Business Machines Corporation and *
* others. All Rights Reserved. *
*******************************************************************************
*
* $Source: /xsrl/Nsvn/icu/unicodetools/com/ibm/text/utility/UTF8StreamWriter.java,v $
* $Date: 2002/03/15 00:34:46 $
* $Revision: 1.5 $
*
*******************************************************************************
*/
package com.ibm.text.utility;
import java.io.*;
/**
* Utility class that writes UTF8.
* Main purpose is to supplant OutputStreamWriter(x, "UTF8"), since that has serious errors.
*
* Example of Usage:
*
* PrintWriter log = new PrintWriter( * new UTF8StreamWriter(new FileOutputStream(fileName), 32*1024)); ** NB: unsynchronized for simplicity and speed. The same object must NOT be used in multiple threads. */ // TODO: Fix case of surrogate pair crossing input buffer boundary public final class UTF8StreamWriter extends Writer { private OutputStream output; private byte[] bBuffer; // do a bit of buffering ourselves for efficiency private int bSafeEnd; private int bEnd; private int bIndex = 0; private int highSurrogate = 0; private boolean removeCR; private boolean Latin1; public UTF8StreamWriter(OutputStream stream, int buffersize) { this(stream, buffersize, true, false); } public UTF8StreamWriter(OutputStream stream, int buffersize, boolean removeCR, boolean latin1) { if (buffersize < 5) { throw new IllegalArgumentException("UTF8StreamWriter buffersize must be >= 5"); } output = stream; bBuffer = new byte[buffersize]; bEnd = buffersize; bSafeEnd = buffersize - 4; this.removeCR = removeCR; this.Latin1 = latin1; } private static final int NEED_2_BYTES = 1<<7, NEED_3_BYTES = 1<<(2*5 + 1), NEED_4_BYTES = 1<<(3*5 + 1); private static final int TRAILING_BOTTOM_MASK = 0x3F, TRAILING_TOP = 0x80; private static final int MAGIC = 0x10000 + ((0 - 0xD800) << 10) + (0 - 0xDC00); public final void write(char[] buffer, int cStart, int cLength) throws IOException { int cEnd = cStart + cLength; while (cStart < cEnd) { // write if we need to if (bIndex > bSafeEnd) { output.write(bBuffer, 0, bIndex); bIndex = 0; } // get code point int utf32 = buffer[cStart++]; if (utf32 == 0x0D && removeCR) continue; // skip write if (Latin1) { if (utf32 > 0xFF) bBuffer[bIndex++] = (byte)'?'; else bBuffer[bIndex++] = (byte)utf32; continue; } // special check for surrogates if (highSurrogate != 0) { if (utf32 >= 0xDC00 && utf32 <= 0xDFFF) { writeCodePoint((highSurrogate << 10) + utf32 + MAGIC); highSurrogate = 0; continue; } writeCodePoint(highSurrogate); highSurrogate = 0; } if (0xD800 <= utf32 && utf32 <= 0xDBFF) { highSurrogate = utf32; continue; } // normal case writeCodePoint(utf32); } } private final void writeCodePoint(int utf32) { // convert to bytes if (utf32 < NEED_2_BYTES) { bBuffer[bIndex++] = (byte)utf32; return; } // Find out how many bytes we need to write // At this point, it is at least 2. //int count; int backIndex; int firstByteMark; if (utf32 < NEED_3_BYTES) { backIndex = bIndex += 2; firstByteMark = 0xC0; } else if (utf32 < NEED_4_BYTES) { backIndex = bIndex += 3; firstByteMark = 0xE0; bBuffer[--backIndex] = (byte)(TRAILING_TOP | (utf32 & TRAILING_BOTTOM_MASK)); utf32 >>= 6; } else { backIndex = bIndex += 4; firstByteMark = 0xF0; bBuffer[--backIndex] = (byte)(TRAILING_TOP | (utf32 & TRAILING_BOTTOM_MASK)); utf32 >>= 6; bBuffer[--backIndex] = (byte)(TRAILING_TOP | (utf32 & TRAILING_BOTTOM_MASK)); utf32 >>= 6; }; bBuffer[--backIndex] = (byte)(TRAILING_TOP | (utf32 & TRAILING_BOTTOM_MASK)); utf32 >>= 6; bBuffer[--backIndex] = (byte)(firstByteMark | utf32); } private void internalFlush() throws IOException { if (highSurrogate != 0) { if (bIndex > bEnd) { output.write(bBuffer, 0, bIndex); bIndex = 0; } writeCodePoint(highSurrogate); highSurrogate = 0; } // write buffer if we need to if (bIndex != 0) { output.write(bBuffer, 0, bIndex); bIndex = 0; } } public void close() throws IOException { internalFlush(); output.close(); } public void flush() throws IOException { internalFlush(); output.flush(); } }