ICU-10944 Extend ByteBuffer support in ICUBinary and Trie classes.

This new functionality is needed for ByteBuffer support in
Normalizer2Impl and BreakIterator.

R=markus.icu@gmail.com

Review URL: https://codereview.appspot.com/107620044

X-SVN-Rev: 36033
This commit is contained in:
Fredrik Roubert 2014-07-15 20:19:55 +00:00
parent 73071730b9
commit 08b5fb11b0
5 changed files with 133 additions and 27 deletions

View File

@ -1,15 +1,16 @@
/*
******************************************************************************
* Copyright (C) 1996-2011, International Business Machines Corporation and *
* others. All Rights Reserved. *
******************************************************************************
*/
******************************************************************************
* Copyright (C) 1996-2014, International Business Machines Corporation and
* others. All Rights Reserved.
******************************************************************************
*/
package com.ibm.icu.impl;
import java.io.DataInputStream;
import java.io.IOException;
import java.io.InputStream;
import java.nio.ByteBuffer;
import com.ibm.icu.text.UTF16;
@ -47,6 +48,23 @@ public class CharTrie extends Trie
}
}
/**
* <p>Creates a new Trie with the settings for the trie data.</p>
* <p>Unserialize the 32-bit-aligned input buffer and use the data for the
* trie.</p>
* @param bytes data of an ICU data file, containing the trie
* @param dataManipulate object which provides methods to parse the char
* data
*/
public CharTrie(ByteBuffer bytes, DataManipulate dataManipulate) {
super(bytes, dataManipulate);
if (!isCharTrie()) {
throw new IllegalArgumentException(
"Data given does not belong to a char trie.");
}
}
/**
* Make a dummy CharTrie.
* A dummy trie is an empty runtime trie, used when a real data trie cannot
@ -253,7 +271,23 @@ public class CharTrie extends Trie
m_data_ = m_index_;
m_initialValue_ = m_data_[m_dataOffset_];
}
/**
* <p>Parses the byte buffer and stores its trie content into a index and
* data array</p>
* @param bytes buffer containing trie data
*/
protected final void unserialize(ByteBuffer bytes)
{
int indexDataLength = m_dataOffset_ + m_dataLength_;
m_index_ = new char[indexDataLength];
for (int i = 0; i < indexDataLength; i ++) {
m_index_[i] = bytes.getChar();
}
m_data_ = m_index_;
m_initialValue_ = m_data_[m_dataOffset_];
}
/**
* Gets the offset to the data which the surrogate pair points to.
* @param lead lead surrogate

View File

@ -4,6 +4,7 @@
* others. All Rights Reserved.
*******************************************************************************
*/
package com.ibm.icu.impl;
import java.io.DataInputStream;
@ -148,6 +149,16 @@ public final class ICUBinary
dataVersion[2], dataVersion[3]);
}
/**
* Same as readHeader(), but returns a VersionInfo rather than a compact int.
*/
public static final VersionInfo readHeaderAndDataVersion(ByteBuffer bytes,
int dataFormat,
Authenticate authenticate)
throws IOException {
return getVersionInfoFromCompactInt(readHeader(bytes, dataFormat, authenticate));
}
/**
* Reads an ICU data header, checks the data format, and returns the data version.
*

View File

@ -1,15 +1,16 @@
/*
******************************************************************************
* Copyright (C) 1996-2011, International Business Machines Corporation and *
* others. All Rights Reserved. *
******************************************************************************
*/
******************************************************************************
* Copyright (C) 1996-2014, International Business Machines Corporation and
* others. All Rights Reserved.
******************************************************************************
*/
package com.ibm.icu.impl;
import java.io.DataInputStream;
import java.io.IOException;
import java.io.InputStream;
import java.nio.ByteBuffer;
import java.util.Arrays;
import com.ibm.icu.lang.UCharacter;
@ -169,7 +170,35 @@ public abstract class Trie
m_dataLength_ = input.readInt();
unserialize(inputStream);
}
/**
* Trie constructor for CharTrie use.
* @param bytes data of an ICU data file, containing the trie
* @param dataManipulate object containing the information to parse the
* trie data
*/
protected Trie(ByteBuffer bytes, DataManipulate dataManipulate)
{
// Magic number to authenticate the data.
int signature = bytes.getInt();
m_options_ = bytes.getInt();
if (!checkHeader(signature)) {
throw new IllegalArgumentException("ICU data file error: Trie header authentication failed, please check if you have the most updated ICU data file");
}
if(dataManipulate != null) {
m_dataManipulate_ = dataManipulate;
} else {
m_dataManipulate_ = new DefaultGetFoldingOffset();
}
m_isLatin1Linear_ = (m_options_ &
HEADER_OPTIONS_LATIN1_IS_LINEAR_MASK_) != 0;
m_dataOffset_ = bytes.getInt();
m_dataLength_ = bytes.getInt();
unserialize(bytes);
}
/**
* Trie constructor
* @param index array to be used for index
@ -376,6 +405,20 @@ public abstract class Trie
}
}
/**
* <p>Parses the byte buffer and creates the trie index with it.</p>
* <p>The position of the input ByteBuffer must be right after the trie header.</p>
* <p>This is overwritten by the child classes.
* @param bytes buffer containing trie data
*/
protected void unserialize(ByteBuffer bytes)
{
m_index_ = new char[m_dataOffset_];
for (int i = 0; i < m_dataOffset_; i ++) {
m_index_[i] = bytes.getChar();
}
}
/**
* Determines if this is a 32 bit trie
* @return true if options specifies this is a 32 bit trie

View File

@ -1,15 +1,17 @@
/*
*******************************************************************************
* Copyright (C) 2009-2010, International Business Machines Corporation and
* Copyright (C) 2009-2014, International Business Machines Corporation and
* others. All Rights Reserved.
*******************************************************************************
*/
package com.ibm.icu.impl;
import java.io.DataOutputStream;
import java.io.IOException;
import java.io.InputStream;
import java.io.OutputStream;
import java.nio.ByteBuffer;
/**
@ -55,6 +57,25 @@ public final class Trie2_16 extends Trie2 {
return (Trie2_16) Trie2.createFromSerialized(is);
}
/**
* Create a Trie2 from its serialized form. Inverse of utrie2_serialize().
* The serialized format is identical between ICU4C and ICU4J, so this function
* will work with serialized Trie2s from either.
*
* The serialized Trie2 in the bytes may be in either little or big endian byte order.
* This allows using serialized Tries from ICU4C without needing to consider the
* byte order of the system that created them.
*
* @param bytes a byte buffer to the serialized form of a UTrie2.
* @return An unserialized Trie2_16, ready for use.
* @throws IllegalArgumentException if the buffer does not contain a serialized Trie2.
* @throws IOException if a read error occurs in the buffer.
* @throws ClassCastException if the bytes contain a serialized Trie2_32
*/
public static Trie2_16 createFromSerialized(ByteBuffer bytes) throws IOException {
return (Trie2_16) Trie2.createFromSerialized(bytes);
}
/**
* Get the value for a code point as stored in the Trie2.
*

View File

@ -1,18 +1,21 @@
/*
*******************************************************************************
* Copyright (C) 2009, International Business Machines Corporation and *
* others. All Rights Reserved. *
* Copyright (C) 2009-2014, International Business Machines Corporation and
* others. All Rights Reserved.
*******************************************************************************
*/
package com.ibm.icu.dev.test.util;
import java.io.ByteArrayInputStream;
import java.io.ByteArrayOutputStream;
import java.io.IOException;
import java.io.InputStream;
import java.nio.ByteBuffer;
import java.util.Iterator;
import com.ibm.icu.dev.test.TestFmwk;
import com.ibm.icu.impl.ICUBinary;
import com.ibm.icu.impl.Trie2;
import com.ibm.icu.impl.Trie2Writable;
import com.ibm.icu.impl.Trie2_16;
@ -212,15 +215,13 @@ public class Trie2Test extends TestFmwk {
ByteArrayOutputStream os = new ByteArrayOutputStream();
try {
frozen16.serialize(os);
ByteArrayInputStream is = new ByteArrayInputStream(os.toByteArray());
Trie2 unserialized16 = Trie2.createFromSerialized(is);
Trie2 unserialized16 = Trie2.createFromSerialized(ByteBuffer.wrap(os.toByteArray()));
assertEquals("", trie, unserialized16);
assertEquals("", Trie2_16.class, unserialized16.getClass());
os.reset();
frozen32.serialize(os);
is = new ByteArrayInputStream(os.toByteArray());
Trie2 unserialized32 = Trie2.createFromSerialized(is);
Trie2 unserialized32 = Trie2.createFromSerialized(ByteBuffer.wrap(os.toByteArray()));
assertEquals("", trie, unserialized32);
assertEquals("", Trie2_32.class, unserialized32.getClass());
} catch (IOException e) {
@ -337,8 +338,7 @@ public class Trie2Test extends TestFmwk {
// Fragile test. Serialized length could change with changes to compaction.
// But it should not change unexpectedly.
assertEquals("", 3508, serializedLen);
ByteArrayInputStream is = new ByteArrayInputStream(os.toByteArray());
Trie2 t1ws16 = Trie2.createFromSerialized(is);
Trie2 t1ws16 = Trie2.createFromSerialized(ByteBuffer.wrap(os.toByteArray()));
assertEquals("", t1ws16.getClass(), Trie2_16.class);
assertEquals("", t1w, t1ws16);
@ -348,8 +348,7 @@ public class Trie2Test extends TestFmwk {
// Fragile test. Serialized length could change with changes to compaction.
// But it should not change unexpectedly.
assertEquals("", 4332, serializedLen);
is = new ByteArrayInputStream(os.toByteArray());
Trie2 t1ws32 = Trie2.createFromSerialized(is);
Trie2 t1ws32 = Trie2.createFromSerialized(ByteBuffer.wrap(os.toByteArray()));
assertEquals("", t1ws32.getClass(), Trie2_32.class);
assertEquals("", t1w, t1ws32);
} catch (IOException e) {
@ -716,13 +715,11 @@ public class Trie2Test extends TestFmwk {
String fileName32 = "Trie2Test." + serializedName + ".32.tri2";
InputStream is = Trie2Test.class.getResourceAsStream(fileName16);
Trie2 trie16 = Trie2.createFromSerialized(is);
is.close();
Trie2 trie16 = Trie2.createFromSerialized(ICUBinary.getByteBufferFromInputStream(is));
trieGettersTest(testName, trie16, checkRanges);
is = Trie2Test.class.getResourceAsStream(fileName32);
Trie2 trie32 = Trie2.createFromSerialized(is);
is.close();
Trie2 trie32 = Trie2.createFromSerialized(ICUBinary.getByteBufferFromInputStream(is));
trieGettersTest(testName, trie32, checkRanges);