ICU-954 Remove genrb's dependency on ustdio

X-SVN-Rev: 4634
This commit is contained in:
Ram Viswanadha 2001-05-10 16:54:09 +00:00
parent bab001e8f6
commit bd00e1fed4
9 changed files with 377 additions and 115 deletions

View File

@ -50,7 +50,7 @@ LIBS = $(LIBUSTDIO) $(LIBICUI18N) $(LIBICUTOOLUTIL) $(LIBICUUC) @LIBS@ @LIB_M@
##OBJECTS = error.o genrb.o ustr.o parse.o read.o write.o list.o \
##rblist.o util.o
OBJECTS = error.o genrb.o parse.o read.o reslist.o ustr.o util.o
OBJECTS = error.o genrb.o parse.o read.o reslist.o ustr.o util.o ucbuf.o
DEPS = $(OBJECTS:.o=.d)

View File

@ -12,17 +12,17 @@
*
* Date Name Description
* 05/25/99 stephen Creation.
* 5/10/01 Ram removed ustdio dependency
*******************************************************************************
*/
#include <stdio.h>
#include "unicode/utypes.h"
#include "unicode/putil.h"
#include "cmemory.h"
#include "cstring.h"
#include "filestrm.h"
#include "ucbuf.h"
U_CDECL_BEGIN
extern int32_t lineCount;
@ -180,6 +180,7 @@ processFile(const char *filename, const char *cp, const char *inputDir, const ch
FileStream *in;
struct SRBRoot *data;
char *rbname;
UCHARBUF* ucbuf;
if(U_FAILURE(*status)) return;
@ -213,28 +214,14 @@ processFile(const char *filename, const char *cp, const char *inputDir, const ch
setErrorText("File not found");
return;
} else { /* auto detect popular encodings */
UBool autodetect = FALSE;
char start[3];
T_FileStream_read(in, start, 3);
if(start[0] == '\xFE' && start[1] == '\xFF') {
cp = "UTF16_BigEndian";
autodetect = TRUE;
} else if(start[0] == '\xFF' && start[1] == '\xFE') {
cp = "UTF16_LittleEndian";
autodetect = TRUE;
} else if(start[0] == '\xEF' && start[1] == '\xBB' && start[2] == '\xBF') {
cp = "UTF8";
autodetect = TRUE;
}
T_FileStream_rewind(in);
if(autodetect == TRUE) {
if(ucbuf_autodetect(in,&cp)) {
printf("Autodetected encoding %s\n", cp);
}
}
ucbuf = ucbuf_open(in,cp,status);
/* Parse the data into an SRBRoot */
data = parse(in, cp, inputDir, status);
data = parse(ucbuf, cp, inputDir, status);
/* Determine the target rb filename */
rbname = make_res_filename(filename, outputDir, status);
@ -252,6 +239,7 @@ processFile(const char *filename, const char *cp, const char *inputDir, const ch
finish:
ucbuf_close(ucbuf);
/* Clean up */
T_FileStream_close(in);

View File

@ -50,7 +50,7 @@ BSC32=bscmake.exe
# ADD BSC32 /nologo
LINK32=link.exe
# ADD BASE LINK32 kernel32.lib user32.lib gdi32.lib winspool.lib comdlg32.lib advapi32.lib shell32.lib ole32.lib oleaut32.lib uuid.lib odbc32.lib odbccp32.lib kernel32.lib user32.lib gdi32.lib winspool.lib comdlg32.lib advapi32.lib shell32.lib ole32.lib oleaut32.lib uuid.lib odbc32.lib odbccp32.lib /nologo /subsystem:console /machine:I386
# ADD LINK32 icuin.lib icuuc.lib icuio.lib icutu.lib /nologo /subsystem:console /machine:I386 /libpath:"..\toolutil\Release" /libpath:"..\..\..\lib"
# ADD LINK32 icuin.lib icuuc.lib icutu.lib /nologo /subsystem:console /machine:I386 /libpath:"..\toolutil\Release" /libpath:"..\..\..\lib"
# Begin Custom Build
InputPath=.\Release\genrb.exe
InputName=genrb
@ -83,7 +83,7 @@ BSC32=bscmake.exe
# ADD BSC32 /nologo
LINK32=link.exe
# ADD BASE LINK32 kernel32.lib user32.lib gdi32.lib winspool.lib comdlg32.lib advapi32.lib shell32.lib ole32.lib oleaut32.lib uuid.lib odbc32.lib odbccp32.lib kernel32.lib user32.lib gdi32.lib winspool.lib comdlg32.lib advapi32.lib shell32.lib ole32.lib oleaut32.lib uuid.lib odbc32.lib odbccp32.lib /nologo /subsystem:console /debug /machine:I386 /pdbtype:sept
# ADD LINK32 icuind.lib icuucd.lib icuiod.lib icutud.lib /nologo /subsystem:console /debug /machine:I386 /pdbtype:sept /libpath:"..\toolutil\Debug" /libpath:"..\..\..\lib"
# ADD LINK32 icuind.lib icuucd.lib icutud.lib /nologo /subsystem:console /debug /machine:I386 /pdbtype:sept /libpath:"..\toolutil\Debug" /libpath:"..\..\..\lib"
# Begin Custom Build
InputPath=.\Debug\genrb.exe
InputName=genrb
@ -125,6 +125,10 @@ SOURCE=.\reslist.c
# End Source File
# Begin Source File
SOURCE=.\ucbuf.c
# End Source File
# Begin Source File
SOURCE=.\ustr.c
# End Source File
# Begin Source File
@ -153,6 +157,10 @@ SOURCE=.\reslist.h
# End Source File
# Begin Source File
SOURCE=.\ucbuf.h
# End Source File
# Begin Source File
SOURCE=.\ustr.h
# End Source File
# Begin Source File

View File

@ -13,6 +13,7 @@
* Date Name Description
* 05/26/99 stephen Creation.
* 02/25/00 weiv Overhaul to write udata
* 5/10/01 Ram removed ustdio dependency
*******************************************************************************
*/
@ -22,7 +23,6 @@
#include "uhash.h"
#include "cmemory.h"
#include "read.h"
#include "unicode/ustdio.h"
#include "ustr.h"
#include "reslist.h"
#include "unicode/ustring.h"
@ -179,19 +179,19 @@ static UBool compareUString(const void* ustr1, const void* ustr2) {
((struct UString*)ustr2)->fChars);
}
static char *getModificationData(struct UFILE *file, UErrorCode *status) {
static char *getModificationData(UCHARBUF* buf, UErrorCode *status) {
enum ETokenType modType;
struct UString modToken;
char *retValue = NULL;
ustr_init(&modToken);
modType = getNextToken(file, &modToken, status);
modType = getNextToken(buf, &modToken, status);
if(U_SUCCESS(*status) && modType == tok_open_brace) {
modType = getNextToken(file, &modToken, status);
modType = getNextToken(buf, &modToken, status);
if(U_SUCCESS(*status) && modType == tok_string) {
retValue = uprv_malloc(u_strlen(modToken.fChars)+1);
u_UCharsToChars(modToken.fChars, retValue, u_strlen(modToken.fChars)+1);
modType = getNextToken(file, &modToken, status);
modType = getNextToken(buf, &modToken, status);
if(U_SUCCESS(*status) && modType == tok_close_brace) {
return retValue;
} else {
@ -212,10 +212,9 @@ int32_t lineCount = 0;
char lastTag[200] = "";
struct SRBRoot*
parse(FileStream *f, const char *cp, const char *inputDir,
parse(UCHARBUF* buf, const char *cp, const char *inputDir,
UErrorCode *status)
{
struct UFILE *file;
enum ETokenType type;
enum ENode node;
struct STransition t;
@ -261,10 +260,9 @@ parse(FileStream *f, const char *cp, const char *inputDir,
node = eInitial;
data = 0;
file = u_finit((FILE *)f, 0, cp);
lineCount = 1;
/* file = u_finit(f, cp, status); */
if(file == NULL) {
if(buf == NULL) {
setErrorText("Could not initialize input file - most probably because of wrong converter\n");
*status = U_INVALID_FORMAT_ERROR;
goto finish;
@ -273,7 +271,7 @@ parse(FileStream *f, const char *cp, const char *inputDir,
bundle = bundle_open(status);
rootTable = bundle -> fRoot;
if(U_FAILURE(*status) || file == NULL) {
if(U_FAILURE(*status) || buf == NULL) {
goto finish;
}
@ -282,7 +280,7 @@ parse(FileStream *f, const char *cp, const char *inputDir,
/* Collation tailoring rules version */
UVersionInfo version;
/* get next token from stream */
type = getNextToken(file, &token, status);
type = getNextToken(buf, &token, status);
if(U_FAILURE(*status)) {
goto finish;
}
@ -305,6 +303,7 @@ parse(FileStream *f, const char *cp, const char *inputDir,
break;
}
t = GETTRANSITION(node, type);
node = t.fNext;
@ -364,7 +363,7 @@ parse(FileStream *f, const char *cp, const char *inputDir,
uint8_t val = 0;
uint8_t *newValue;
fprintf(stdout, "bin\n");
binaryValue = getModificationData(file, status);
binaryValue = getModificationData(buf, status);
if(U_SUCCESS(*status) && binaryValue != NULL) {
/* do the parsing & outputing of the data */
fprintf(stdout, "Will parse binary value %s and store it in tag: %s\n", binaryValue, cTag);
@ -395,7 +394,7 @@ parse(FileStream *f, const char *cp, const char *inputDir,
char *intValue;
int32_t val;
fprintf(stdout, "int\n");
intValue = getModificationData(file, status);
intValue = getModificationData(buf, status);
if(U_SUCCESS(*status) && intValue != NULL) {
/* do the parsing & outputing of the data */
fprintf(stdout, "Will parse integer value %s and store it in tag: %s\n", intValue, cTag);
@ -422,7 +421,7 @@ parse(FileStream *f, const char *cp, const char *inputDir,
uint8_t *binData;
char *fileName;
fprintf(stdout, "import\n");
fileName = getModificationData(file, status);
fileName = getModificationData(buf, status);
if(U_SUCCESS(*status) && fileName != NULL) {
/* do the reading & outputing of the file */
fprintf(stdout, "Will read %s and store it in tag: %s\n", fileName, cTag);
@ -493,16 +492,11 @@ parse(FileStream *f, const char *cp, const char *inputDir,
/* Record a singleton string */
case eStr:
/* check if we have reached here after finding %%UCARULES */
/* check if we have reached here after finding %%UCARULES */
if(ucaEl==TRUE){
UChar *c,*end,*ucaRulesStr;
FileStream *in =NULL;
UFILE* ufile=NULL;
int fileLength = 0;
char fileName[256]={'\0'};
char cs[128] = { '\0'};
const char* cpStr=NULL;
char start[3] ={'0'};
ucaEl=FALSE; /* reset ucaEL */
/* make the fileName including the directory */
uprv_strcat(fileName,inputDir);
@ -513,41 +507,37 @@ parse(FileStream *f, const char *cp, const char *inputDir,
uprv_strcat(fileName, cs);
/* open the file */
in = T_FileStream_open(fileName, "rb");
T_FileStream_read(in, start, 3);
if(start[0] == '\xFE' && start[1] == '\xFF') {
cpStr = "UTF16_BigEndian";
} else if(start[0] == '\xFF' && start[1] == '\xFE') {
cpStr = "UTF16_LittleEndian";
} else if(start[0] == '\xEF' && start[1] == '\xBB' && start[2] == '\xBF') {
cpStr = "UTF8";
}
ufile = u_finit((FILE*) in,0, cpStr);
if(in && ufile){
fileLength =T_FileStream_size(in);
ucaRulesStr = (UChar*)uprv_malloc(sizeof(UChar) * fileLength);
c= ucaRulesStr;
end = ucaRulesStr + fileLength/2;
/* read in the rulses */
while(c < end) {
*c++ = u_fgetc(ufile);
}
/* couldn't read all chars */
if(c < end) {
fprintf(stderr, "Error! Couldn't read all chars from input file %s for tag %s\n", fileName, cTag);
}else{
/* Add it to bundle */
temp = string_open(bundle,cTag, ucaRulesStr, fileLength/2, status);
table_add(rootTable, temp, status);
put(data, &tag, status);
if(U_FAILURE(*status)) {
goto finish;
}
temp = NULL;
}
uprv_free(ucaRulesStr);
if(in){
const char* cp;
UChar c=0;
UCHARBUF* ucbuf;
int size = T_FileStream_size(in);
UChar* pTarget = (UChar*) uprv_malloc(sizeof(UChar)*size);
UChar* target = pTarget;
UChar* targetLimit = pTarget+size;
if(ucbuf_autodetect(in,&cp)){
}
ucbuf= ucbuf_open(in,cp,status);
do{
c = (UChar)ucbuf_getc(ucbuf,status);
unescape(ucbuf,status);
*(target++) = c;
}while(c!=U_EOF && (target<targetLimit));
/* Add it to bundle */
temp = string_open(bundle,cTag, pTarget, target-pTarget, status);
table_add(rootTable, temp, status);
put(data, &tag, status);
if(U_FAILURE(*status)) {
goto finish;
}
temp = NULL;
/* clean up */
uprv_free(pTarget);
}else{
fprintf(stderr, "Error! Couldn't open input file %s for tag %s\n", fileName, cTag );
goto finish;
}
}else{
@ -815,8 +805,8 @@ finish:
/*uprv_free(cTag);*/
/*uprv_free(cSubTag);*/
if(file != 0)
u_fclose(file);
/*if(buf != 0)
ucbuf_close(buf);
*/
return bundle;
}

View File

@ -21,8 +21,9 @@
#include "unicode/utypes.h"
#include "filestrm.h"
#include "rblist.h"
#include "ucbuf.h"
/* Parse a ResourceBundle text file */
struct SRBRoot* parse(FileStream *f, const char *cp, const char* inputDir, UErrorCode *status);
struct SRBRoot* parse(UCHARBUF* buf, const char *cp, const char* inputDir, UErrorCode *status);
#endif

View File

@ -12,12 +12,12 @@
*
* Date Name Description
* 05/26/99 stephen Creation.
* 5/10/01 Ram removed ustdio dependency
*******************************************************************************
*/
#include "read.h"
#include "error.h"
#include "unicode/ustdio.h"
#include "unicode/ustring.h"
#define OPENBRACE 0x007B
@ -46,13 +46,12 @@ static UBool didInit=FALSE;
extern int32_t lineCount;
/* Protos */
static enum ETokenType getStringToken(UFILE *f, UChar initialChar,
static enum ETokenType getStringToken(UCHARBUF* buf, UChar initialChar,
struct UString *token,
UErrorCode *status);
static UChar unescape(UFILE *f, UErrorCode *status);
static UChar getNextChar(UFILE *f, UBool skipwhite, UErrorCode *status);
static void seekUntilNewline(UFILE *f, UErrorCode *status);
static void seekUntilEndOfComment(UFILE *f, UErrorCode *status);
static UChar getNextChar(UCHARBUF* buf, UBool skipwhite, UErrorCode *status);
static void seekUntilNewline(UCHARBUF* buf, UErrorCode *status);
static void seekUntilEndOfComment(UCHARBUF* buf, UErrorCode *status);
static UBool isWhitespace(UChar c);
static UBool isNewline(UChar c);
@ -65,7 +64,7 @@ static UBool isNewline(UChar c);
never return eString twice in a row; instead, multiple adjacent
string tokens will be merged into one, with no intervening
space. */
enum ETokenType getNextToken(UFILE *f,
enum ETokenType getNextToken(UCHARBUF* buf,
struct UString *token,
UErrorCode *status)
{
@ -77,7 +76,7 @@ enum ETokenType getNextToken(UFILE *f,
return tok_error;
/* Skip whitespace */
c = getNextChar(f, TRUE, status);
c = getNextChar(buf, TRUE, status);
if(U_FAILURE(*status))
return tok_error;
@ -93,7 +92,7 @@ enum ETokenType getNextToken(UFILE *f,
tokenType = getStringToken(f, c, token, status);
break;
*/
default: return getStringToken(f, c, token, status);
default: return getStringToken(buf, c, token, status);
}
/*
if(!didInit) {
@ -137,7 +136,7 @@ enum ETokenType getNextToken(UFILE *f,
well. If two adjacent strings are quoted, they are merged without
intervening space. Otherwise a single SPACE character is
inserted. */
static enum ETokenType getStringToken(UFILE *f,
static enum ETokenType getStringToken(UCHARBUF* buf,
UChar initialChar,
struct UString *token,
UErrorCode *status)
@ -171,7 +170,7 @@ static enum ETokenType getStringToken(UFILE *f,
lastStringWasQuoted = TRUE;
for(;;) {
c = u_fgetc(f);
c = (UChar)ucbuf_getc(buf,status);
/* c = u_fgetc(f, status);*/
/* EOF reached */
@ -184,7 +183,7 @@ static enum ETokenType getStringToken(UFILE *f,
if(c == QUOTE)
break;
if(c == ESCAPE)
c = unescape(f, status);
c = unescape(buf, status);
ustr_ucat(token, c, status);
if(U_FAILURE(*status))
return tok_error;
@ -199,17 +198,17 @@ static enum ETokenType getStringToken(UFILE *f,
lastStringWasQuoted = FALSE;
if(c == ESCAPE)
c = unescape(f, status);
c = unescape(buf, status);
ustr_ucat(token, c, status);
if(U_FAILURE(*status))
return tok_error;
for(;;) {
/* DON'T skip whitespace */
c = getNextChar(f, FALSE, status);
c = getNextChar(buf, FALSE, status);
/* EOF reached */
if(c == (UChar)U_EOF) {
u_fungetc(c, f);
ucbuf_ungetc(c, buf);
return tok_string;
}
@ -222,7 +221,7 @@ static enum ETokenType getStringToken(UFILE *f,
|| c == COMMA
/*|| c == COLON*/)
{
u_fungetc(c, f);
ucbuf_ungetc(c, buf);
/*u_fungetc(c, f, status);*/
break;
}
@ -231,7 +230,7 @@ static enum ETokenType getStringToken(UFILE *f,
break;
if(c == ESCAPE)
c = unescape(f, status);
c = unescape(buf, status);
ustr_ucat(token, c, status);
if(U_FAILURE(*status))
return tok_error;
@ -239,12 +238,12 @@ static enum ETokenType getStringToken(UFILE *f,
}
/* DO skip whitespace */
c = getNextChar(f, TRUE, status);
c = getNextChar(buf, TRUE, status);
if(U_FAILURE(*status))
return tok_string;
if(c == OPENBRACE || c == CLOSEBRACE || c == COMMA/* || c == COLON*/) {
u_fungetc(c, f);
ucbuf_ungetc(c, buf);
/*u_fungetc(c, f, status);*/
return tok_string;
}
@ -253,7 +252,7 @@ static enum ETokenType getStringToken(UFILE *f,
/* Retrieve the next character, ignoring comments. If skipwhite is
true, whitespace is skipped as well. */
static UChar getNextChar(UFILE *f,
static UChar getNextChar(UCHARBUF* buf,
UBool skipwhite,
UErrorCode *status)
{
@ -263,7 +262,7 @@ static UChar getNextChar(UFILE *f,
return U_EOF;
for(;;) {
c = u_fgetc(f);
c =(UChar) ucbuf_getc(buf,status);
/*c = u_fgetc(f, status);*/
if(c == (UChar)U_EOF)
return U_EOF;
@ -275,23 +274,23 @@ static UChar getNextChar(UFILE *f,
if(c != SLASH)
return c;
c = u_fgetc(f);
c = (UChar)ucbuf_getc(buf,status);
/* c = u_fgetc(f, status);*/
if(c == (UChar)U_EOF)
return U_EOF;
switch(c) {
case SLASH:
seekUntilNewline(f, status);
seekUntilNewline(buf, status);
break;
case ASTERISK:
/* Note that we silently ignore an unterminated comment */
seekUntilEndOfComment(f, status);
seekUntilEndOfComment(buf, status);
break;
default:
u_fungetc(c, f);
ucbuf_ungetc(c, buf);
/*u_fungetc(c, f, status);*/
/* If get() failed this is a NOP */
return SLASH;
@ -299,7 +298,7 @@ static UChar getNextChar(UFILE *f,
}
}
static void seekUntilNewline(UFILE *f,
static void seekUntilNewline(UCHARBUF* buf,
UErrorCode *status)
{
UChar c;
@ -308,7 +307,7 @@ static void seekUntilNewline(UFILE *f,
return;
do {
c = u_fgetc(f);
c = (UChar)ucbuf_getc(buf,status);
/* c = u_fgetc(f, status);*/
} while(! isNewline(c) && c != (UChar)U_EOF && *status == U_ZERO_ERROR);
@ -316,7 +315,7 @@ static void seekUntilNewline(UFILE *f,
err = kItemNotFound;*/
}
static void seekUntilEndOfComment(UFILE *f,
static void seekUntilEndOfComment(UCHARBUF* buf,
UErrorCode *status)
{
UChar c, d;
@ -325,13 +324,13 @@ static void seekUntilEndOfComment(UFILE *f,
return;
do {
c = u_fgetc(f);
c =(UChar) ucbuf_getc(buf,status);
/* c = u_fgetc(f, status);*/
if(c == ASTERISK) {
d = u_fgetc(f);
d =(UChar) ucbuf_getc(buf,status);
/* d = u_fgetc(f, status);*/
if(d != SLASH)
u_fungetc(d, f);
ucbuf_ungetc(d, buf);
/*u_fungetc(d, f, status);*/
else
break;
@ -344,15 +343,15 @@ static void seekUntilEndOfComment(UFILE *f,
}
}
static UChar unescape(UFILE *f,
UChar unescape(UCHARBUF* buf,
UErrorCode *status)
{
if(U_FAILURE(*status))
return U_EOF;
/* We expect to be called after the ESCAPE has been seen, but
* u_fgetcx needs an ESCAPE to do its magic. */
u_fungetc(ESCAPE, f);
return (UChar) u_fgetcx(f);
ucbuf_ungetc(ESCAPE, buf);
return (UChar) ucbuf_getcx(buf,status);
}
static UBool isWhitespace(UChar c)

View File

@ -12,6 +12,7 @@
*
* Date Name Description
* 05/26/99 stephen Creation.
* 5/10/01 Ram removed ustdio dependency
*******************************************************************************
*/
@ -19,8 +20,8 @@
#define READ_H 1
#include "unicode/utypes.h"
#include "unicode/ustdio.h"
#include "ustr.h"
#include "ucbuf.h"
/* The types of tokens which may be returned by getNextToken. */
enum ETokenType
@ -43,7 +44,8 @@ enum ETokenType
tok_token_type_count = 12 /* Number of "real" token types */
};
enum ETokenType getNextToken(UFILE *f,
UChar unescape(UCHARBUF* buf, UErrorCode *status);
enum ETokenType getNextToken(UCHARBUF* buf,
struct UString *token,
UErrorCode *status);

View File

@ -0,0 +1,225 @@
/*
*******************************************************************************
*
* Copyright (C) 1998-2001, International Business Machines
* Corporation and others. All Rights Reserved.
*
*******************************************************************************
*
* File ucbuf.c
*
* Modification History:
*
* Date Name Description
* 05/10/01 Ram Creation.
*******************************************************************************
*/
#include "unicode/utypes.h"
#include "unicode/ucnv.h"
#include "filestrm.h"
#include "cmemory.h"
#include "unicode/utrans.h"
#include "unicode/ustring.h"
#include "ucbuf.h"
#define MAX_BUF 1000
UBool ucbuf_autodetect(FileStream* in,const char** cp){
UBool autodetect = FALSE;
char start[3];
int cap =T_FileStream_size(in);
T_FileStream_read(in, start, 3);
if(start[0] == '\xFE' && start[1] == '\xFF') {
*cp = "UTF16_BigEndian";
autodetect = TRUE;
} else if(start[0] == '\xFF' && start[1] == '\xFE') {
*cp = "UTF16_LittleEndian";
autodetect = TRUE;
} else if(start[0] == '\xEF' && start[1] == '\xBB' && start[2] == '\xBF') {
*cp = "UTF8";
autodetect = TRUE;
}
if(!autodetect){
T_FileStream_rewind(in);
}
return autodetect;
}
UCHARBUF* ucbuf_fillucbuf( UCHARBUF* buf,UErrorCode* err){
UChar* pTarget=NULL;
UChar* target=NULL;
const char* source=NULL;
char* cbuf =NULL;
int32_t numConverted =0;
int32_t limit=0;
int numRead=0;
int numWritten=0;
int offset=0;
cbuf =(char*)uprv_malloc(sizeof(char) * MAX_BUF);
if(buf->buffer==NULL){
/* allocate buffers */
pTarget = (UChar*) uprv_malloc(sizeof(UChar)* MAX_BUF);
}else{
pTarget = buf->buffer;
/* check if we arrived here without exhausting the buffer*/
if(buf->currentPos<buf->bufLimit){
offset= buf->bufLimit-buf->currentPos;
memmove(buf->buffer,buf->currentPos,offset* sizeof(UChar));
memset(pTarget+offset,0xff,sizeof(UChar)*(MAX_BUF-offset));
}else{
memset(pTarget,0xff,sizeof(UChar)*MAX_BUF);
}
}
/* read the file */
numRead=T_FileStream_read(buf->in,cbuf,MAX_BUF-offset);
buf->remaining-=numRead;
target=pTarget;
/* convert the bytes */
if(buf->conv){
/* since state is saved in the converter we add offset to source*/
target = pTarget+offset;
source = cbuf;
ucnv_toUnicode(buf->conv,&target,target+numRead,&source,source+numRead,NULL,FALSE,err);
numRead= target-pTarget;
if(U_FAILURE(*err)){
return NULL;
}
}else{
u_charsToUChars(cbuf,target+offset,numRead);
numRead=((buf->remaining>MAX_BUF)? MAX_BUF:numRead+offset);
}
buf->buffer= pTarget;
buf->currentPos = pTarget;
buf->bufLimit=pTarget+numRead;
return buf;
}
UChar32 ucbuf_getc(UCHARBUF* buf,UErrorCode* err){
UChar32 c =0;
if(buf->currentPos<buf->bufLimit){
c = *(buf->currentPos);
buf->currentPos++;
return c;
}else{
if(buf->remaining==0){
return U_EOF;
}
buf=ucbuf_fillucbuf(buf,err);
if(U_FAILURE(*err)){
return U_EOF;
}
c = *(buf->currentPos);
buf->currentPos++;
return c;
}
return 0;
}
/* u_unescapeAt() callback to return a UChar*/
static UChar _charAt(int32_t offset, void *context) {
return ((UCHARBUF*) context)->currentPos[offset];
}
UChar32
ucbuf_getcx(UCHARBUF* buf,UErrorCode* err) {
int32_t length;
int32_t offset;
UChar32 c32;
UChar c16;
/* Fill the buffer if it is empty */
if (buf->currentPos >=buf->bufLimit) {
ucbuf_fillucbuf(buf,err);
}
/* Get the next character in the buffer */
if (buf->currentPos < buf->bufLimit) {
c16 = *(buf->currentPos)++;
} else {
c16 = U_EOF;
}
/* If it isn't a backslash, return it */
if (c16 != 0x005C /*'\\'*/) {
return c16;
}
/* Determine the amount of data in the buffer */
length = buf->bufLimit-buf->currentPos;
/* The longest escape sequence is \Uhhhhhhhh; make sure
we have at least that many characters */
if (length < 10) {
/* fill the buffer */
ucbuf_fillucbuf(buf,err);
length = buf->bufLimit-buf->buffer;
}
/* Process the escape */
offset = 0;
c32 = u_unescapeAt(_charAt, &offset, length, (void*)buf);
/* Update the current buffer position */
buf->currentPos += offset;
return c32;
}
UCHARBUF* ucbuf_open(FileStream* in, const char* cp,UErrorCode* err){
UCHARBUF* buf =(UCHARBUF*) uprv_malloc(sizeof(UCHARBUF));
if(U_FAILURE(*err)){
return NULL;
}
if(buf){
buf->in=in;
buf->fileLen = T_FileStream_size(in);
buf->remaining=buf->fileLen;
buf->buffer=NULL;
buf->currentPos=NULL;
buf->bufLimit=NULL;
if(*cp!='\0'){
buf->conv=ucnv_open(cp,err);
}else{
buf->conv=NULL;
}
if(U_FAILURE(*err)){
return NULL;
}
buf=ucbuf_fillucbuf(buf,err);
return buf;
}else{
*err = U_MEMORY_ALLOCATION_ERROR;
return NULL;
}
}
void ucbuf_closebuf(UCHARBUF* buf){
uprv_free(buf->buffer);
}
void ucbuf_ungetc(UChar32 c,UCHARBUF* buf){
if(buf->currentPos!=buf->buffer){
buf->currentPos--;
}
}
void ucbuf_close(UCHARBUF* buf){
if(buf->conv){
ucnv_close(buf->conv);
}
buf->in=NULL;
buf->currentPos=NULL;
buf->bufLimit=NULL;
ucbuf_closebuf(buf);
uprv_free(buf);
}

View File

@ -0,0 +1,49 @@
/*
*******************************************************************************
*
* Copyright (C) 1998-2001, International Business Machines
* Corporation and others. All Rights Reserved.
*
*******************************************************************************
*
* File ucbuf.c
*
* Modification History:
*
* Date Name Description
* 05/10/01 Ram Creation.
*******************************************************************************
*/
#include "unicode/utypes.h"
#include "unicode/ucnv.h"
#include "filestrm.h"
#include "cmemory.h"
#include <stdio.h>
#ifndef UCBUF_H
#define UCBUF_H 1
struct UCHARBUF {
UChar* buffer;
UChar* currentPos;
UChar* bufLimit;
int32_t fileLen;
int32_t remaining;
FileStream* in;
UConverter* conv;
};
typedef struct UCHARBUF UCHARBUF;
#define U_EOF 0xFFFF
UChar32 ucbuf_getc(UCHARBUF* buf,UErrorCode* err);
UChar32 ucbuf_getcx(UCHARBUF* buf,UErrorCode* err);
void ucbuf_rewind(UCHARBUF* buf);
UCHARBUF* ucbuf_open(FileStream* in,const char* cp,UErrorCode* err);
void ucbuf_close(UCHARBUF* buf);
void ucbuf_ungetc(UChar32 ungetChar,UCHARBUF* buf);
void ucbuf_closebuf(UCHARBUF* buf);
UBool ucbuf_autodetect(FileStream* in,const char** cp);
#endif