scuffed-code/icu4c/source/tools/toolutil/ucbuf.h

/*
*******************************************************************************
*
*   Copyright (C) 1998-2001, International Business Machines
*   Corporation and others.  All Rights Reserved.
*
*******************************************************************************
*
* File ucbuf.c
*
* Modification History:
*
*   Date        Name        Description
*   05/10/01    Ram         Creation.
*
* This API reads in files and returns UChars
*******************************************************************************
*/

#include "unicode/utypes.h"
#include "unicode/ucnv.h"
#include "filestrm.h"
#include "cmemory.h"
#include <stdio.h>

#ifndef UCBUF_H
#define UCBUF_H 1

typedef struct UCHARBUF UCHARBUF;
/**
 * End of file value
 */
#define U_EOF 0xFFFFFFFF
/**
 * Error value if a sequence cannot be unescaped
 */
#define U_ERR 0xFFFFFFFE

typedef struct ULine ULine;

struct  ULine {
    UChar     *name;
    int32_t   len;
};

/**
 * Opens the UCHARBUF with the given file stream and code page for conversion
 * @param fileName  Name of the file to open.
 * @param codepage  The encoding of the file stream to convert to Unicode. 
 *                  If *codepoge is NULL on input the API will try to autodetect
 *                  popular Unicode encodings
 * @param showWarning Flag to print out warnings to STDOUT
 * @param buffered  If TRUE performs a buffered read of the input file. If FALSE reads 
 *                  the whole file into memory and converts it.
 * @param err is a pointer to a valid <code>UErrorCode</code> value. If this value
 *        indicates a failure on entry, the function will immediately return.
 *        On exit the value will indicate the success of the operation.
 * @return pointer to the newly opened UCHARBUF
 */
U_CAPI UCHARBUF* U_EXPORT2
ucbuf_open(const char* fileName,const char** codepage,UBool showWarning, UBool buffered, UErrorCode* err);

/**
 * Gets a UTF-16 code unit at the current position from the converted buffer
 * and increments the current position
 * @param buf Pointer to UCHARBUF structure
 * @param err is a pointer to a valid <code>UErrorCode</code> value. If this value
 *        indicates a failure on entry, the function will immediately return.
 *        On exit the value will indicate the success of the operation.
 */
U_CAPI int32_t U_EXPORT2
ucbuf_getc(UCHARBUF* buf,UErrorCode* err);

/**
 * Gets a UTF-32 code point at the current position from the converted buffer
 * and increments the current position
 * @param buf Pointer to UCHARBUF structure
 * @param err is a pointer to a valid <code>UErrorCode</code> value. If this value
 *        indicates a failure on entry, the function will immediately return.
 *        On exit the value will indicate the success of the operation.
 */
U_CAPI int32_t U_EXPORT2
ucbuf_getc32(UCHARBUF* buf,UErrorCode* err);

/**
 * Gets a UTF-16 code unit at the current position from the converted buffer after 
 * unescaping and increments the current position. If the escape sequence is for UTF-32
 * code point (\\Uxxxxxxxx) then a UTF-32 codepoint is returned
 * @param buf Pointer to UCHARBUF structure
 * @param err is a pointer to a valid <code>UErrorCode</code> value. If this value
 *        indicates a failure on entry, the function will immediately return.
 *        On exit the value will indicate the success of the operation.
 */
U_CAPI int32_t U_EXPORT2
ucbuf_getcx32(UCHARBUF* buf,UErrorCode* err);

/**
 * Gets a pointer to the current position in the internal buffer and length of the line.
 * It imperative to make a copy of the returned buffere before performing operations on it.
 * @param buf Pointer to UCHARBUF structure
 * @param len Output param to receive the len of the buffer returned till end of the line
 * @param err is a pointer to a valid <code>UErrorCode</code> value. If this value
 *        indicates a failure on entry, the function will immediately return.
 *        On exit the value will indicate the success of the operation.
 *        Error: U_TRUNCATED_CHAR_FOUND
 * @return Pointer to the internal buffer
 */
U_CAPI const UChar* U_EXPORT2
ucbuf_readline(UCHARBUF* buf,int32_t* len, UErrorCode* err);


/**
 * Resets the buffers and the underlying file stream.
 * @param buf Pointer to UCHARBUF structure
 * @param err is a pointer to a valid <code>UErrorCode</code> value. If this value
 *        indicates a failure on entry, the function will immediately return.
 *        On exit the value will indicate the success of the operation.
 */
U_CAPI void U_EXPORT2
ucbuf_rewind(UCHARBUF* buf,UErrorCode* err);

/**
 * Returns a pointer to the internal converted buffer
 * @param buf Pointer to UCHARBUF structure
 * @param len Pointer to int32_t to receive the lenth of buffer
 * @param err is a pointer to a valid <code>UErrorCode</code> value. If this value
 *        indicates a failure on entry, the function will immediately return.
 *        On exit the value will indicate the success of the operation.
 * @return Pointer to internal UChar buffer
 */
U_CAPI const UChar* U_EXPORT2
ucbuf_getBuffer(UCHARBUF* buf,int32_t* len,UErrorCode* err);

/**
 * Closes the UCHARBUF structure members and cleans up the malloc'ed memory
 * @param buf Pointer to UCHARBUF structure
 */
U_CAPI void U_EXPORT2
ucbuf_close(UCHARBUF* buf);

/**
 * Rewinds the buffer by one codepoint
 */
U_CAPI void U_EXPORT2
ucbuf_ungetc(int32_t ungetChar,UCHARBUF* buf);


/**
 * Autodetects the encoding of the file stream. Only Unicode charsets are autodectected. 
 * Some Unicode charsets are stateful and need byte identifiers to be converted also to bring
 * the converter to correct state for converting the rest of the stream. So the UConverter parameter
 * is necessary.
 * If the charset was autodetected, the caller must close both the input FileStream
 * and the converter.
 *
 * @param fileName The file name to be opened and encoding autodected
 * @param conv  Output param to receive the opened converter if autodetected; NULL otherwise.
 * @param cp Output param to receive the detected encoding
 * @param err is a pointer to a valid <code>UErrorCode</code> value. If this value
 *        indicates a failure on entry, the function will immediately return.
 *        On exit the value will indicate the success of the operation.
 * @return The input FileStream if its charset was autodetected; NULL otherwise.
 */
U_CAPI FileStream * U_EXPORT2
ucbuf_autodetect(const char* fileName, const char** cp,UConverter** conv, int32_t* signatureLength,UErrorCode* error);

/**
 * Autodetects the encoding of the file stream. Only Unicode charsets are autodectected. 
 * Some Unicode charsets are stateful and need byte identifiers to be converted also to bring
 * the converter to correct state for converting the rest of the stream. So the UConverter parameter
 * is necessary.
 * If the charset was autodetected, the caller must close the converter.
 *
 * @param fileStream The file stream whose encoding is to be detected
 * @param conv  Output param to receive the opened converter if autodetected; NULL otherwise.
 * @param cp Output param to receive the detected encoding
 * @param err is a pointer to a valid <code>UErrorCode</code> value. If this value
 *        indicates a failure on entry, the function will immediately return.
 *        On exit the value will indicate the success of the operation.
 * @return Boolean whether the Unicode charset was autodetected.
 */

U_CAPI UBool U_EXPORT2
ucbuf_autodetect_fs(FileStream* in, const char** cp, UConverter** conv, int32_t* signatureLength, UErrorCode* error);

/**
 * Returns the approximate size in UChars required for converting the file to UChars
 */
U_CAPI int32_t U_EXPORT2
ucbuf_size(UCHARBUF* buf);

U_CAPI const char* U_EXPORT2
ucbuf_resolveFileName(const char* inputDir, const char* fileName, char* target, int32_t* len, UErrorCode* status);

#endif
ICU-954 Remove genrb's dependency on ustdio X-SVN-Rev: 4634 2001-05-10 16:54:09 +00:00			`/*`
			`*******************************************************************************`
			`*`
			`* Copyright (C) 1998-2001, International Business Machines`
			`* Corporation and others. All Rights Reserved.`
			`*`
			`*******************************************************************************`
			`*`
			`* File ucbuf.c`
			`*`
			`* Modification History:`
			`*`
			`* Date Name Description`
			`* 05/10/01 Ram Creation.`
ICU-954 Move ucbuf to toolutil. X-SVN-Rev: 4682 2001-05-16 01:09:06 +00:00			`*`
			`* This API reads in files and returns UChars`
ICU-954 Remove genrb's dependency on ustdio X-SVN-Rev: 4634 2001-05-10 16:54:09 +00:00			`*******************************************************************************`
			`*/`

			`#include "unicode/utypes.h"`
			`#include "unicode/ucnv.h"`
			`#include "filestrm.h"`
			`#include "cmemory.h"`
			`#include <stdio.h>`

			`#ifndef UCBUF_H`
			`#define UCBUF_H 1`

			`typedef struct UCHARBUF UCHARBUF;`
ICU-2363 Performance test take 1 X-SVN-Rev: 10007 2002-10-10 01:04:15 +00:00			`/**`
			`* End of file value`
			`*/`
ICU-954 Swap definitions of U_EOF and U_ERR to synch with ustdio X-SVN-Rev: 5335 2001-07-24 23:15:31 +00:00			`#define U_EOF 0xFFFFFFFF`
ICU-2363 Performance test take 1 X-SVN-Rev: 10007 2002-10-10 01:04:15 +00:00			`/**`
			`* Error value if a sequence cannot be unescaped`
			`*/`
ICU-954 Swap definitions of U_EOF and U_ERR to synch with ustdio X-SVN-Rev: 5335 2001-07-24 23:15:31 +00:00			`#define U_ERR 0xFFFFFFFE`
ICU-2363 Performance test take 1 X-SVN-Rev: 10007 2002-10-10 01:04:15 +00:00
			`typedef struct ULine ULine;`

			`struct ULine {`
			`UChar *name;`
			`int32_t len;`
			`};`

			`/**`
			`* Opens the UCHARBUF with the given file stream and code page for conversion`
			`* @param fileName Name of the file to open.`
			`* @param codepage The encoding of the file stream to convert to Unicode.`
			`* If *codepoge is NULL on input the API will try to autodetect`
			`* popular Unicode encodings`
			`* @param showWarning Flag to print out warnings to STDOUT`
			`* @param buffered If TRUE performs a buffered read of the input file. If FALSE reads`
			`* the whole file into memory and converts it.`
			`* @param err is a pointer to a valid <code>UErrorCode</code> value. If this value`
			`* indicates a failure on entry, the function will immediately return.`
			`* On exit the value will indicate the success of the operation.`
			`* @return pointer to the newly opened UCHARBUF`
			`*/`
			`U_CAPI UCHARBUF* U_EXPORT2`
			`ucbuf_open(const char* fileName,const char** codepage,UBool showWarning, UBool buffered, UErrorCode* err);`

			`/**`
			`* Gets a UTF-16 code unit at the current position from the converted buffer`
			`* and increments the current position`
			`* @param buf Pointer to UCHARBUF structure`
			`* @param err is a pointer to a valid <code>UErrorCode</code> value. If this value`
			`* indicates a failure on entry, the function will immediately return.`
			`* On exit the value will indicate the success of the operation.`
			`*/`
			`U_CAPI int32_t U_EXPORT2`
ICU-954 Fix for HPUX, untabify and coding guidelines changes. X-SVN-Rev: 4641 2001-05-10 21:43:01 +00:00			`ucbuf_getc(UCHARBUF* buf,UErrorCode* err);`

ICU-2363 Performance test take 1 X-SVN-Rev: 10007 2002-10-10 01:04:15 +00:00			`/**`
			`* Gets a UTF-32 code point at the current position from the converted buffer`
			`* and increments the current position`
			`* @param buf Pointer to UCHARBUF structure`
			`* @param err is a pointer to a valid <code>UErrorCode</code> value. If this value`
			`* indicates a failure on entry, the function will immediately return.`
			`* On exit the value will indicate the success of the operation.`
			`*/`
			`U_CAPI int32_t U_EXPORT2`
			`ucbuf_getc32(UCHARBUF* buf,UErrorCode* err);`

			`/**`
			`* Gets a UTF-16 code unit at the current position from the converted buffer after`
			`* unescaping and increments the current position. If the escape sequence is for UTF-32`
			`* code point (\\Uxxxxxxxx) then a UTF-32 codepoint is returned`
			`* @param buf Pointer to UCHARBUF structure`
			`* @param err is a pointer to a valid <code>UErrorCode</code> value. If this value`
			`* indicates a failure on entry, the function will immediately return.`
			`* On exit the value will indicate the success of the operation.`
			`*/`
			`U_CAPI int32_t U_EXPORT2`
			`ucbuf_getcx32(UCHARBUF* buf,UErrorCode* err);`

			`/**`
			`* Gets a pointer to the current position in the internal buffer and length of the line.`
			`* It imperative to make a copy of the returned buffere before performing operations on it.`
			`* @param buf Pointer to UCHARBUF structure`
			`* @param len Output param to receive the len of the buffer returned till end of the line`
			`* @param err is a pointer to a valid <code>UErrorCode</code> value. If this value`
			`* indicates a failure on entry, the function will immediately return.`
			`* On exit the value will indicate the success of the operation.`
			`* Error: U_TRUNCATED_CHAR_FOUND`
			`* @return Pointer to the internal buffer`
			`*/`
			`U_CAPI const UChar* U_EXPORT2`
			`ucbuf_readline(UCHARBUF* buf,int32_t* len, UErrorCode* err);`

ICU-954 Fix for HPUX, untabify and coding guidelines changes. X-SVN-Rev: 4641 2001-05-10 21:43:01 +00:00
ICU-2363 Performance test take 1 X-SVN-Rev: 10007 2002-10-10 01:04:15 +00:00			`/**`
			`* Resets the buffers and the underlying file stream.`
			`* @param buf Pointer to UCHARBUF structure`
			`* @param err is a pointer to a valid <code>UErrorCode</code> value. If this value`
			`* indicates a failure on entry, the function will immediately return.`
			`* On exit the value will indicate the success of the operation.`
			`*/`
ICU-954 Move ucbuf to toolutil. X-SVN-Rev: 4682 2001-05-16 01:09:06 +00:00			`U_CAPI void U_EXPORT2`
ICU-2363 Performance test take 1 X-SVN-Rev: 10007 2002-10-10 01:04:15 +00:00			`ucbuf_rewind(UCHARBUF* buf,UErrorCode* err);`
ICU-954 Fix for HPUX, untabify and coding guidelines changes. X-SVN-Rev: 4641 2001-05-10 21:43:01 +00:00
ICU-2363 Performance test take 1 X-SVN-Rev: 10007 2002-10-10 01:04:15 +00:00			`/**`
			`* Returns a pointer to the internal converted buffer`
			`* @param buf Pointer to UCHARBUF structure`
			`* @param len Pointer to int32_t to receive the lenth of buffer`
			`* @param err is a pointer to a valid <code>UErrorCode</code> value. If this value`
			`* indicates a failure on entry, the function will immediately return.`
			`* On exit the value will indicate the success of the operation.`
			`* @return Pointer to internal UChar buffer`
			`*/`
			`U_CAPI const UChar* U_EXPORT2`
			`ucbuf_getBuffer(UCHARBUF* buf,int32_t* len,UErrorCode* err);`
ICU-954 Fix for HPUX, untabify and coding guidelines changes. X-SVN-Rev: 4641 2001-05-10 21:43:01 +00:00
ICU-2363 Performance test take 1 X-SVN-Rev: 10007 2002-10-10 01:04:15 +00:00			`/**`
			`* Closes the UCHARBUF structure members and cleans up the malloc'ed memory`
			`* @param buf Pointer to UCHARBUF structure`
			`*/`
ICU-954 Move ucbuf to toolutil. X-SVN-Rev: 4682 2001-05-16 01:09:06 +00:00			`U_CAPI void U_EXPORT2`
ICU-954 Fix for HPUX, untabify and coding guidelines changes. X-SVN-Rev: 4641 2001-05-10 21:43:01 +00:00			`ucbuf_close(UCHARBUF* buf);`

ICU-2363 Performance test take 1 X-SVN-Rev: 10007 2002-10-10 01:04:15 +00:00			`/**`
			`* Rewinds the buffer by one codepoint`
			`*/`
ICU-954 Move ucbuf to toolutil. X-SVN-Rev: 4682 2001-05-16 01:09:06 +00:00			`U_CAPI void U_EXPORT2`
ICU-2363 Performance test take 1 X-SVN-Rev: 10007 2002-10-10 01:04:15 +00:00			`ucbuf_ungetc(int32_t ungetChar,UCHARBUF* buf);`

ICU-954 Fix for HPUX, untabify and coding guidelines changes. X-SVN-Rev: 4641 2001-05-10 21:43:01 +00:00
ICU-2363 Performance test take 1 X-SVN-Rev: 10007 2002-10-10 01:04:15 +00:00			`/**`
ICU-1849 properly detect UTF-7 signature(s) X-SVN-Rev: 10202 2002-11-08 01:28:14 +00:00			`* Autodetects the encoding of the file stream. Only Unicode charsets are autodectected.`
			`* Some Unicode charsets are stateful and need byte identifiers to be converted also to bring`
ICU-2363 Performance test take 1 X-SVN-Rev: 10007 2002-10-10 01:04:15 +00:00			`* the converter to correct state for converting the rest of the stream. So the UConverter parameter`
			`* is necessary.`
ICU-1849 properly detect UTF-7 signature(s) X-SVN-Rev: 10202 2002-11-08 01:28:14 +00:00			`* If the charset was autodetected, the caller must close both the input FileStream`
			`* and the converter.`
			`*`
ICU-2363 Performance test take 1 X-SVN-Rev: 10007 2002-10-10 01:04:15 +00:00			`* @param fileName The file name to be opened and encoding autodected`
ICU-1849 properly detect UTF-7 signature(s) X-SVN-Rev: 10202 2002-11-08 01:28:14 +00:00			`* @param conv Output param to receive the opened converter if autodetected; NULL otherwise.`
ICU-2363 Performance test take 1 X-SVN-Rev: 10007 2002-10-10 01:04:15 +00:00			`* @param cp Output param to receive the detected encoding`
			`* @param err is a pointer to a valid <code>UErrorCode</code> value. If this value`
			`* indicates a failure on entry, the function will immediately return.`
			`* On exit the value will indicate the success of the operation.`
ICU-1849 properly detect UTF-7 signature(s) X-SVN-Rev: 10202 2002-11-08 01:28:14 +00:00			`* @return The input FileStream if its charset was autodetected; NULL otherwise.`
ICU-2363 Performance test take 1 X-SVN-Rev: 10007 2002-10-10 01:04:15 +00:00			`*/`
ICU-1849 properly detect UTF-7 signature(s) X-SVN-Rev: 10202 2002-11-08 01:28:14 +00:00			`U_CAPI FileStream * U_EXPORT2`
ICU-2363 Performance test take 1 X-SVN-Rev: 10007 2002-10-10 01:04:15 +00:00			`ucbuf_autodetect(const char* fileName, const char cp,UConverter conv, int32_t* signatureLength,UErrorCode* error);`

			`/**`
ICU-1849 properly detect UTF-7 signature(s) X-SVN-Rev: 10202 2002-11-08 01:28:14 +00:00			`* Autodetects the encoding of the file stream. Only Unicode charsets are autodectected.`
			`* Some Unicode charsets are stateful and need byte identifiers to be converted also to bring`
ICU-2363 Performance test take 1 X-SVN-Rev: 10007 2002-10-10 01:04:15 +00:00			`* the converter to correct state for converting the rest of the stream. So the UConverter parameter`
			`* is necessary.`
ICU-1849 properly detect UTF-7 signature(s) X-SVN-Rev: 10202 2002-11-08 01:28:14 +00:00			`* If the charset was autodetected, the caller must close the converter.`
			`*`
ICU-2363 Performance test take 1 X-SVN-Rev: 10007 2002-10-10 01:04:15 +00:00			`* @param fileStream The file stream whose encoding is to be detected`
ICU-1849 properly detect UTF-7 signature(s) X-SVN-Rev: 10202 2002-11-08 01:28:14 +00:00			`* @param conv Output param to receive the opened converter if autodetected; NULL otherwise.`
ICU-2363 Performance test take 1 X-SVN-Rev: 10007 2002-10-10 01:04:15 +00:00			`* @param cp Output param to receive the detected encoding`
			`* @param err is a pointer to a valid <code>UErrorCode</code> value. If this value`
			`* indicates a failure on entry, the function will immediately return.`
			`* On exit the value will indicate the success of the operation.`
ICU-1849 properly detect UTF-7 signature(s) X-SVN-Rev: 10202 2002-11-08 01:28:14 +00:00			`* @return Boolean whether the Unicode charset was autodetected.`
ICU-2363 Performance test take 1 X-SVN-Rev: 10007 2002-10-10 01:04:15 +00:00			`*/`

			`U_CAPI UBool U_EXPORT2`
			`ucbuf_autodetect_fs(FileStream* in, const char cp, UConverter conv, int32_t* signatureLength, UErrorCode* error);`

			`/**`
			`* Returns the approximate size in UChars required for converting the file to UChars`
			`*/`
			`U_CAPI int32_t U_EXPORT2`
			`ucbuf_size(UCHARBUF* buf);`

			`U_CAPI const char* U_EXPORT2`
			`ucbuf_resolveFileName(const char* inputDir, const char* fileName, char* target, int32_t* len, UErrorCode* status);`
ICU-954 Remove genrb's dependency on ustdio X-SVN-Rev: 4634 2001-05-10 16:54:09 +00:00
			`#endif`