scuffed-code/icu4c/source/tools/genrb/derb.cpp
2017-02-03 18:57:23 +00:00

658 lines
22 KiB
C++

// © 2016 and later: Unicode, Inc. and others.
// License & terms of use: http://www.unicode.org/copyright.html
/*
*******************************************************************************
*
* Copyright (C) 1999-2016, International Business Machines
* Corporation and others. All Rights Reserved.
*
*******************************************************************************
* file name: derb.cpp
* encoding: UTF-8
* tab size: 8 (not used)
* indentation:4
*
* created on: 2000sep6
* created by: Vladimir Weinstein as an ICU workshop example
* maintained by: Yves Arrouye <yves@realnames.com>
*/
#include "unicode/stringpiece.h"
#include "unicode/ucnv.h"
#include "unicode/unistr.h"
#include "unicode/ustring.h"
#include "unicode/putil.h"
#include "unicode/ustdio.h"
#include "charstr.h"
#include "uresimp.h"
#include "cmemory.h"
#include "cstring.h"
#include "uoptions.h"
#include "toolutil.h"
#include "ustrfmt.h"
#if !UCONFIG_NO_FORMATTING
#define DERB_VERSION "1.1"
#define DERB_DEFAULT_TRUNC 80
static const int32_t indentsize = 4;
static int32_t truncsize = DERB_DEFAULT_TRUNC;
static UBool opt_truncate = FALSE;
static const char *getEncodingName(const char *encoding);
static void reportError(const char *pname, UErrorCode *status, const char *when);
static UChar *quotedString(const UChar *string);
static void printOutBundle(UFILE *out, UResourceBundle *resource, int32_t indent, const char *pname, UErrorCode *status);
static void printString(UFILE *out, const UChar *str, int32_t len);
static void printCString(UFILE *out, const char *str, int32_t len);
static void printIndent(UFILE *out, int32_t indent);
static void printHex(UFILE *out, uint8_t what);
static UOption options[]={
UOPTION_HELP_H,
UOPTION_HELP_QUESTION_MARK,
/* 2 */ UOPTION_ENCODING,
/* 3 */ { "to-stdout", NULL, NULL, NULL, 'c', UOPT_NO_ARG, 0 } ,
/* 4 */ { "truncate", NULL, NULL, NULL, 't', UOPT_OPTIONAL_ARG, 0 },
/* 5 */ UOPTION_VERBOSE,
/* 6 */ UOPTION_DESTDIR,
/* 7 */ UOPTION_SOURCEDIR,
/* 8 */ { "bom", NULL, NULL, NULL, 0, UOPT_NO_ARG, 0 },
/* 9 */ UOPTION_ICUDATADIR,
/* 10 */ UOPTION_VERSION,
/* 11 */ { "suppressAliases", NULL, NULL, NULL, 'A', UOPT_NO_ARG, 0 },
};
static UBool verbose = FALSE;
static UBool suppressAliases = FALSE;
static UFILE *ustderr = NULL;
extern int
main(int argc, char* argv[]) {
const char *encoding = NULL;
const char *outputDir = NULL; /* NULL = no output directory, use current */
const char *inputDir = ".";
int tostdout = 0;
int prbom = 0;
const char *pname;
UResourceBundle *bundle = NULL;
int32_t i = 0;
const char* arg;
/* Get the name of tool. */
pname = uprv_strrchr(*argv, U_FILE_SEP_CHAR);
#if U_FILE_SEP_CHAR != U_FILE_ALT_SEP_CHAR
if (!pname) {
pname = uprv_strrchr(*argv, U_FILE_ALT_SEP_CHAR);
}
#endif
if (!pname) {
pname = *argv;
} else {
++pname;
}
/* error handling, printing usage message */
argc=u_parseArgs(argc, argv, UPRV_LENGTHOF(options), options);
/* error handling, printing usage message */
if(argc<0) {
fprintf(stderr,
"%s: error in command line argument \"%s\"\n", pname,
argv[-argc]);
}
if(argc<0 || options[0].doesOccur || options[1].doesOccur) {
fprintf(argc < 0 ? stderr : stdout,
"%csage: %s [ -h, -?, --help ] [ -V, --version ]\n"
" [ -v, --verbose ] [ -e, --encoding encoding ] [ --bom ]\n"
" [ -t, --truncate [ size ] ]\n"
" [ -s, --sourcedir source ] [ -d, --destdir destination ]\n"
" [ -i, --icudatadir directory ] [ -c, --to-stdout ]\n"
" [ -A, --suppressAliases]\n"
" bundle ...\n", argc < 0 ? 'u' : 'U',
pname);
return argc<0 ? U_ILLEGAL_ARGUMENT_ERROR : U_ZERO_ERROR;
}
if(options[10].doesOccur) {
fprintf(stderr,
"%s version %s (ICU version %s).\n"
"%s\n",
pname, DERB_VERSION, U_ICU_VERSION, U_COPYRIGHT_STRING);
return U_ZERO_ERROR;
}
if(options[2].doesOccur) {
encoding = options[2].value;
}
if (options[3].doesOccur) {
if(options[2].doesOccur) {
fprintf(stderr, "%s: Error: don't specify an encoding (-e) when writing to stdout (-c).\n", pname);
return 3;
}
tostdout = 1;
}
if(options[4].doesOccur) {
opt_truncate = TRUE;
if(options[4].value != NULL) {
truncsize = atoi(options[4].value); /* user defined printable size */
} else {
truncsize = DERB_DEFAULT_TRUNC; /* we'll use default omitting size */
}
} else {
opt_truncate = FALSE;
}
if(options[5].doesOccur) {
verbose = TRUE;
}
if (options[6].doesOccur) {
outputDir = options[6].value;
}
if(options[7].doesOccur) {
inputDir = options[7].value; /* we'll use users resources */
}
if (options[8].doesOccur) {
prbom = 1;
}
if (options[9].doesOccur) {
u_setDataDirectory(options[9].value);
}
if (options[11].doesOccur) {
suppressAliases = TRUE;
}
fflush(stderr); // use ustderr now.
ustderr = u_finit(stderr, NULL, NULL);
for (i = 1; i < argc; ++i) {
static const UChar sp[] = { 0x0020 }; /* " " */
arg = getLongPathname(argv[i]);
if (verbose) {
u_fprintf(ustderr, "processing bundle \"%s\"\n", argv[i]);
}
icu::CharString locale;
UErrorCode status = U_ZERO_ERROR;
{
const char *p = findBasename(arg);
const char *q = uprv_strrchr(p, '.');
if (q == NULL) {
locale.append(p, status);
} else {
locale.append(p, (int32_t)(q - p), status);
}
}
if (U_FAILURE(status)) {
return status;
}
icu::CharString infile;
const char *thename = NULL;
UBool fromICUData = !uprv_strcmp(inputDir, "-");
if (!fromICUData) {
UBool absfilename = *arg == U_FILE_SEP_CHAR;
#if U_PLATFORM_HAS_WIN32_API
if (!absfilename) {
absfilename = (uprv_strlen(arg) > 2 && isalpha(arg[0])
&& arg[1] == ':' && arg[2] == U_FILE_SEP_CHAR);
}
#endif
if (absfilename) {
thename = arg;
} else {
const char *q = uprv_strrchr(arg, U_FILE_SEP_CHAR);
#if U_FILE_SEP_CHAR != U_FILE_ALT_SEP_CHAR
if (q == NULL) {
q = uprv_strrchr(arg, U_FILE_ALT_SEP_CHAR);
}
#endif
infile.append(inputDir, status);
if(q != NULL) {
infile.appendPathPart(icu::StringPiece(arg, (int32_t)(q - arg)), status);
}
if (U_FAILURE(status)) {
return status;
}
thename = infile.data();
}
}
if (thename) {
bundle = ures_openDirect(thename, locale.data(), &status);
} else {
bundle = ures_open(fromICUData ? 0 : inputDir, locale.data(), &status);
}
if (U_SUCCESS(status)) {
UFILE *out = NULL;
const char *filename = 0;
const char *ext = 0;
if (locale.isEmpty() || !tostdout) {
filename = findBasename(arg);
ext = uprv_strrchr(filename, '.');
if (!ext) {
ext = uprv_strchr(filename, 0);
}
}
if (tostdout) {
out = u_get_stdout();
} else {
icu::CharString thefile;
if (outputDir) {
thefile.append(outputDir, status);
}
thefile.appendPathPart(filename, status);
if (*ext) {
thefile.truncate(thefile.length() - (int32_t)uprv_strlen(ext));
}
thefile.append(".txt", status);
if (U_FAILURE(status)) {
return status;
}
out = u_fopen(thefile.data(), "w", NULL, encoding);
if (!out) {
u_fprintf(ustderr, "%s: couldn't create %s\n", pname, thefile.data());
u_fclose(ustderr);
return 4;
}
}
// now, set the callback.
ucnv_setFromUCallBack(u_fgetConverter(out), UCNV_FROM_U_CALLBACK_ESCAPE, UCNV_ESCAPE_C, 0, 0, &status);
if (U_FAILURE(status)) {
u_fprintf(ustderr, "%s: couldn't configure converter for encoding\n", pname);
u_fclose(ustderr);
if(!tostdout) {
u_fclose(out);
}
return 3;
}
if (prbom) { /* XXX: Should be done only for UTFs */
u_fputc(0xFEFF, out);
}
u_fprintf(out, "// -*- Coding: %s; -*-\n//\n", encoding ? encoding : getEncodingName(ucnv_getDefaultName()));
u_fprintf(out, "// This file was dumped by derb(8) from ");
if (thename) {
u_fprintf(out, "%s", thename);
} else if (fromICUData) {
u_fprintf(out, "the ICU internal %s locale", locale.data());
}
u_fprintf(out, "\n// derb(8) by Vladimir Weinstein and Yves Arrouye\n\n");
if (!locale.isEmpty()) {
u_fprintf(out, "%s", locale.data());
} else {
u_fprintf(out, "%.*s%.*S", (int32_t)(ext - filename), filename, UPRV_LENGTHOF(sp), sp);
}
printOutBundle(out, bundle, 0, pname, &status);
if (!tostdout) {
u_fclose(out);
}
}
else {
reportError(pname, &status, "opening resource file");
}
ures_close(bundle);
}
return 0;
}
static UChar *quotedString(const UChar *string) {
int len = u_strlen(string);
int alen = len;
const UChar *sp;
UChar *newstr, *np;
for (sp = string; *sp; ++sp) {
switch (*sp) {
case '\n':
case 0x0022:
++alen;
break;
}
}
newstr = (UChar *) uprv_malloc((1 + alen) * U_SIZEOF_UCHAR);
for (sp = string, np = newstr; *sp; ++sp) {
switch (*sp) {
case '\n':
*np++ = 0x005C;
*np++ = 0x006E;
break;
case 0x0022:
*np++ = 0x005C;
U_FALLTHROUGH;
default:
*np++ = *sp;
break;
}
}
*np = 0;
return newstr;
}
static void printString(UFILE *out, const UChar *str, int32_t len) {
u_file_write(str, len, out);
}
static void printCString(UFILE *out, const char *str, int32_t len) {
if(len==-1) {
u_fprintf(out, "%s", str);
} else {
u_fprintf(out, "%.*s", len, str);
}
}
static void printIndent(UFILE *out, int32_t indent) {
icu::UnicodeString inchar(indent, 0x20, indent);
printString(out, inchar.getBuffer(), indent);
}
static void printHex(UFILE *out, uint8_t what) {
static const char map[] = "0123456789ABCDEF";
UChar hex[2];
hex[0] = map[what >> 4];
hex[1] = map[what & 0xf];
printString(out, hex, 2);
}
static void printOutAlias(UFILE *out, UResourceBundle *parent, Resource r, const char *key, int32_t indent, const char *pname, UErrorCode *status) {
static const UChar cr[] = { 0xA }; // LF
int32_t len = 0;
const UChar* thestr = res_getAlias(&(parent->fResData), r, &len);
UChar *string = quotedString(thestr);
if(opt_truncate && len > truncsize) {
char msg[128];
printIndent(out, indent);
sprintf(msg, "// WARNING: this resource, size %li is truncated to %li\n",
(long)len, (long)truncsize/2);
printCString(out, msg, -1);
len = truncsize;
}
if(U_SUCCESS(*status)) {
static const UChar openStr[] = { 0x003A, 0x0061, 0x006C, 0x0069, 0x0061, 0x0073, 0x0020, 0x007B, 0x0020, 0x0022 }; /* ":alias { \"" */
static const UChar closeStr[] = { 0x0022, 0x0020, 0x007D, 0x0020 }; /* "\" } " */
printIndent(out, indent);
if(key != NULL) {
printCString(out, key, -1);
}
printString(out, openStr, UPRV_LENGTHOF(openStr));
printString(out, string, len);
printString(out, closeStr, UPRV_LENGTHOF(closeStr));
if(verbose) {
printCString(out, " // ALIAS", -1);
}
printString(out, cr, UPRV_LENGTHOF(cr));
} else {
reportError(pname, status, "getting binary value");
}
uprv_free(string);
}
static void printOutBundle(UFILE *out, UResourceBundle *resource, int32_t indent, const char *pname, UErrorCode *status)
{
static const UChar cr[] = { 0xA }; // LF
/* int32_t noOfElements = ures_getSize(resource);*/
int32_t i = 0;
const char *key = ures_getKey(resource);
switch(ures_getType(resource)) {
case URES_STRING :
{
int32_t len=0;
const UChar* thestr = ures_getString(resource, &len, status);
UChar *string = quotedString(thestr);
/* TODO: String truncation */
if(opt_truncate && len > truncsize) {
char msg[128];
printIndent(out, indent);
sprintf(msg, "// WARNING: this resource, size %li is truncated to %li\n",
(long)len, (long)(truncsize/2));
printCString(out, msg, -1);
len = truncsize/2;
}
printIndent(out, indent);
if(key != NULL) {
static const UChar openStr[] = { 0x0020, 0x007B, 0x0020, 0x0022 }; /* " { \"" */
static const UChar closeStr[] = { 0x0022, 0x0020, 0x007D }; /* "\" }" */
printCString(out, key, (int32_t)uprv_strlen(key));
printString(out, openStr, UPRV_LENGTHOF(openStr));
printString(out, string, len);
printString(out, closeStr, UPRV_LENGTHOF(closeStr));
} else {
static const UChar openStr[] = { 0x0022 }; /* "\"" */
static const UChar closeStr[] = { 0x0022, 0x002C }; /* "\"," */
printString(out, openStr, UPRV_LENGTHOF(openStr));
printString(out, string, (int32_t)(u_strlen(string)));
printString(out, closeStr, UPRV_LENGTHOF(closeStr));
}
if(verbose) {
printCString(out, "// STRING", -1);
}
printString(out, cr, UPRV_LENGTHOF(cr));
uprv_free(string);
}
break;
case URES_INT :
{
static const UChar openStr[] = { 0x003A, 0x0069, 0x006E, 0x0074, 0x0020, 0x007B, 0x0020 }; /* ":int { " */
static const UChar closeStr[] = { 0x0020, 0x007D }; /* " }" */
UChar num[20];
printIndent(out, indent);
if(key != NULL) {
printCString(out, key, -1);
}
printString(out, openStr, UPRV_LENGTHOF(openStr));
uprv_itou(num, 20, ures_getInt(resource, status), 10, 0);
printString(out, num, u_strlen(num));
printString(out, closeStr, UPRV_LENGTHOF(closeStr));
if(verbose) {
printCString(out, "// INT", -1);
}
printString(out, cr, UPRV_LENGTHOF(cr));
break;
}
case URES_BINARY :
{
int32_t len = 0;
const int8_t *data = (const int8_t *)ures_getBinary(resource, &len, status);
if(opt_truncate && len > truncsize) {
char msg[128];
printIndent(out, indent);
sprintf(msg, "// WARNING: this resource, size %li is truncated to %li\n",
(long)len, (long)(truncsize/2));
printCString(out, msg, -1);
len = truncsize;
}
if(U_SUCCESS(*status)) {
static const UChar openStr[] = { 0x003A, 0x0062, 0x0069, 0x006E, 0x0061, 0x0072, 0x0079, 0x0020, 0x007B, 0x0020 }; /* ":binary { " */
static const UChar closeStr[] = { 0x0020, 0x007D, 0x0020 }; /* " } " */
printIndent(out, indent);
if(key != NULL) {
printCString(out, key, -1);
}
printString(out, openStr, UPRV_LENGTHOF(openStr));
for(i = 0; i<len; i++) {
printHex(out, *data++);
}
printString(out, closeStr, UPRV_LENGTHOF(closeStr));
if(verbose) {
printCString(out, " // BINARY", -1);
}
printString(out, cr, UPRV_LENGTHOF(cr));
} else {
reportError(pname, status, "getting binary value");
}
}
break;
case URES_INT_VECTOR :
{
int32_t len = 0;
const int32_t *data = ures_getIntVector(resource, &len, status);
if(U_SUCCESS(*status)) {
static const UChar openStr[] = { 0x003A, 0x0069, 0x006E, 0x0074, 0x0076, 0x0065, 0x0063, 0x0074, 0x006F, 0x0072, 0x0020, 0x007B, 0x0020 }; /* ":intvector { " */
static const UChar closeStr[] = { 0x0020, 0x007D, 0x0020 }; /* " } " */
UChar num[20];
printIndent(out, indent);
if(key != NULL) {
printCString(out, key, -1);
}
printString(out, openStr, UPRV_LENGTHOF(openStr));
for(i = 0; i < len - 1; i++) {
int32_t numLen = uprv_itou(num, 20, data[i], 10, 0);
num[numLen++] = 0x002C; /* ',' */
num[numLen++] = 0x0020; /* ' ' */
num[numLen] = 0;
printString(out, num, u_strlen(num));
}
if(len > 0) {
uprv_itou(num, 20, data[len - 1], 10, 0);
printString(out, num, u_strlen(num));
}
printString(out, closeStr, UPRV_LENGTHOF(closeStr));
if(verbose) {
printCString(out, "// INTVECTOR", -1);
}
printString(out, cr, UPRV_LENGTHOF(cr));
} else {
reportError(pname, status, "getting int vector");
}
}
break;
case URES_TABLE :
case URES_ARRAY :
{
static const UChar openStr[] = { 0x007B }; /* "{" */
static const UChar closeStr[] = { 0x007D, '\n' }; /* "}\n" */
UResourceBundle *t = NULL;
ures_resetIterator(resource);
printIndent(out, indent);
if(key != NULL) {
printCString(out, key, -1);
}
printString(out, openStr, UPRV_LENGTHOF(openStr));
if(verbose) {
if(ures_getType(resource) == URES_TABLE) {
printCString(out, "// TABLE", -1);
} else {
printCString(out, "// ARRAY", -1);
}
}
printString(out, cr, UPRV_LENGTHOF(cr));
if(suppressAliases == FALSE) {
while(U_SUCCESS(*status) && ures_hasNext(resource)) {
t = ures_getNextResource(resource, t, status);
if(U_SUCCESS(*status)) {
printOutBundle(out, t, indent+indentsize, pname, status);
} else {
reportError(pname, status, "While processing table");
*status = U_ZERO_ERROR;
}
}
} else { /* we have to use low level access to do this */
Resource r;
int32_t resSize = ures_getSize(resource);
UBool isTable = (UBool)(ures_getType(resource) == URES_TABLE);
for(i = 0; i < resSize; i++) {
/* need to know if it's an alias */
if(isTable) {
r = res_getTableItemByIndex(&resource->fResData, resource->fRes, i, &key);
} else {
r = res_getArrayItem(&resource->fResData, resource->fRes, i);
}
if(U_SUCCESS(*status)) {
if(res_getPublicType(r) == URES_ALIAS) {
printOutAlias(out, resource, r, key, indent+indentsize, pname, status);
} else {
t = ures_getByIndex(resource, i, t, status);
printOutBundle(out, t, indent+indentsize, pname, status);
}
} else {
reportError(pname, status, "While processing table");
*status = U_ZERO_ERROR;
}
}
}
printIndent(out, indent);
printString(out, closeStr, UPRV_LENGTHOF(closeStr));
ures_close(t);
}
break;
default:
break;
}
}
static const char *getEncodingName(const char *encoding) {
UErrorCode err;
const char *enc;
err = U_ZERO_ERROR;
if (!(enc = ucnv_getStandardName(encoding, "MIME", &err))) {
err = U_ZERO_ERROR;
if (!(enc = ucnv_getStandardName(encoding, "IANA", &err))) {
;
}
}
return enc;
}
static void reportError(const char *pname, UErrorCode *status, const char *when) {
u_fprintf(ustderr, "%s: error %d while %s: %s\n", pname, *status, when, u_errorName(*status));
}
#else
extern int
main(int argc, char* argv[]) {
/* Changing stdio.h ustdio.h requires that formatting not be disabled. */
return 3;
}
#endif /* !UCONFIG_NO_FORMATTING */
/*
* Local Variables:
* indent-tabs-mode: nil
* End:
*/