ICU-1220 Added escape-codepoint as a callback to let people use the
new UCNV_ESCAPE_CODEPOINT. Options compatibility with iconv(1): added -V, --version; -s, --silent; --verbose (and -v because of ICU commands); -o, --output file. uconv(1) is now mostly compatible with iconv(1) usage, except for the fact that one cannot convert many files in one run yet. X-SVN-Rev: 7414
This commit is contained in:
parent
85a889c4c2
commit
3b470abbe3
@ -1,6 +1,6 @@
|
||||
// -*- Coding: utf-8; -*- [all uconv resource files]
|
||||
// Copyright (c) 2000 IBM, Inc. and Others.
|
||||
// $Revision: 1.16 $
|
||||
// $Revision: 1.17 $
|
||||
//
|
||||
// Root translation file for uconv messages.
|
||||
// So you want to translate this file??? Great!
|
||||
@ -30,9 +30,12 @@ root
|
||||
|
||||
lcUsageWord { "usage" }
|
||||
ucUsageWord { "Usage" }
|
||||
usage { "{0}: {1} [ -h, -?, --help ] [ -l, --list | --list-code code | --default-code | -L, --list-transliterators ] [ --canon ] [ -x transliterator ] [ --to-callback callback | -c ] [ --from-callback callback | -i ] -f, --from-code code -t, --to-code code [ file ]\n" }
|
||||
usage { "{0}: {1} [ -h, -?, --help ] [ -V, --version ] [ -s, --silent ] [ -v, --verbose ] [ -l, --list | --list-code code | --default-code | -L, --list-transliterators ] [ --canon ] [ -x transliterator ] [ --to-callback callback | -c ] [ --from-callback callback | -i ] [ --callback callback ] -f, --from-code code -t, --to-code code [ file ] [ -o, --output file ]\n" }
|
||||
|
||||
help { "Options: -h, --help print this message\n"
|
||||
" -V, --version print the program version\n"
|
||||
" -s, --silent suppress messages\n"
|
||||
" -v, --verbose display progress information\n"
|
||||
" -l, --list list all available encodings\n"
|
||||
" --list-code code list only the given encoding\n"
|
||||
" --default-code list only the default encoding\n"
|
||||
@ -43,8 +46,10 @@ root
|
||||
" -c omit invalid characters from the output\n"
|
||||
" --from-callback callback use callback on original encoding\n"
|
||||
" -i ignore invalid sequences in the input\n"
|
||||
" --callback callback use callback on both encodings\n"
|
||||
" -f, --from-code code set the original encoding\n"
|
||||
" -t, --to-code code set the destination encoding\n"
|
||||
" -o, --output file write output to file\n"
|
||||
"\n"
|
||||
"Callbacks:"
|
||||
}
|
||||
@ -66,6 +71,7 @@ root
|
||||
unknownCallback { "Unknown callback: {0}\n" } // 0: callback name
|
||||
|
||||
cantOpenInputF { "Couldn''t open input file {0}: {1}.\n" } // 0: file, 1: strerror [OS error string]
|
||||
cantCreateOutputF { "Couldn''t create output file {0}: {1}.\n" } // 0: file, 1: strerror [OS error string]
|
||||
|
||||
cantWrite { "The converted text couldn't be written: {0}.\n" } // 0: OS error string
|
||||
cantRead { "Error reading from input file {0}.\n" } // 0: OS error string
|
||||
|
@ -16,6 +16,15 @@
|
||||
.BR "\-h\fP, \fB\-?\fP, \fB\-\-help"
|
||||
]
|
||||
[
|
||||
.BI "\-V\fP, \fB\-\-version"
|
||||
]
|
||||
[
|
||||
.BI "\-s\fP, \fB\-\-silent"
|
||||
]
|
||||
[
|
||||
.BI "\-v\fP, \fB\-\-verbose"
|
||||
]
|
||||
[
|
||||
.BI "\-l\fP, \fB\-\-list"
|
||||
|
|
||||
.BI "\-l\fP, \fB\-\-list\-code" " code"
|
||||
@ -40,14 +49,24 @@
|
||||
|
|
||||
.B "\-i"
|
||||
]
|
||||
[
|
||||
.BI "\-\-callback" " callback"
|
||||
]
|
||||
.BI "\-f\fP, \fB\-\-from\-code" " encoding"
|
||||
.BI "\-t\fP, \fB\-\-to\-code" " encoding"
|
||||
[
|
||||
.I file
|
||||
.IR file
|
||||
]
|
||||
[
|
||||
.BI "\-o\fP, \fB\-\-output" " file"
|
||||
]
|
||||
.SH DESCRIPTION
|
||||
.B uconv
|
||||
converts its input from one given
|
||||
converts each given
|
||||
.I file
|
||||
(or its standard input if no
|
||||
.I file
|
||||
is specified) from one
|
||||
.I encoding
|
||||
to another. The transcoding is done using Unicode as a pivot encoding
|
||||
(e.g. the data are first transcoded from their original encoding to
|
||||
@ -66,6 +85,12 @@ after the data have been transcoded to Unicode.
|
||||
.BR \-h\fP, \fB\-?\fP, \fB\-\-help
|
||||
Print help about usage and exit.
|
||||
.TP
|
||||
.BI "\-s\fP, \fB\-\-silent"
|
||||
Suppress messages during execution.
|
||||
.TP
|
||||
.BI "\-v\fP, \fB\-\-verbose"
|
||||
Display extra informative messages during execution.
|
||||
.TP
|
||||
.BI "\-l\fP, \fB\-\-list"
|
||||
List all the available encodings and exit.
|
||||
.TP
|
||||
@ -125,6 +150,15 @@ Ignore invalid sequences in the input.
|
||||
Same as
|
||||
.BR "\-\-from\-callback skip" .
|
||||
.TP
|
||||
.BI "\-\-callback" " callback"
|
||||
Use
|
||||
.I callback
|
||||
to handle both characters that cannot be transcoded from the original
|
||||
encoding and characters that cannot be transcoded to the destination
|
||||
encoding. See section
|
||||
.B CALLBACKS
|
||||
for details on valid callbacks.
|
||||
.TP
|
||||
.BI "\-f\fP, \fB\-\-from\-code" " encoding"
|
||||
Set the original encoding of the data to
|
||||
.IR encoding .
|
||||
@ -132,6 +166,10 @@ Set the original encoding of the data to
|
||||
.BI "\-t\fP, \fB\-\-to\-code" " encoding"
|
||||
Transcode the data to
|
||||
.IR encoding .
|
||||
.TP
|
||||
.BI "\-o\fP, \fB\-\-output" " file"
|
||||
Write the transcode data to
|
||||
.IR file .
|
||||
.SH CALLBACKS
|
||||
.B uconv
|
||||
supports specifying callbacks to handle invalid data. Callbacks can be
|
||||
@ -150,19 +188,19 @@ callbacks actually supported by
|
||||
is displayed when it is called with
|
||||
.BR "\-h\fP, \fB\-\-help" .
|
||||
.PP
|
||||
.TP \w'\fBescape-xml-hex'u+3n
|
||||
.TP \w'\fBescape-codepoint'u+3n
|
||||
.B substitute
|
||||
Write the the encoding's substitute sequence, or the Unicode
|
||||
replacement character
|
||||
.B U+FFFD
|
||||
when transcoding to Unicode.
|
||||
This is the default callback.
|
||||
.TP
|
||||
.B skip
|
||||
Ignore the invalid data.
|
||||
.TP
|
||||
.B stop
|
||||
Stop with an error when encountering invalid data.
|
||||
This is the default callback.
|
||||
.TP
|
||||
.B escape
|
||||
Same as
|
||||
@ -210,6 +248,14 @@ Replace the missing characters with a string of the format
|
||||
where
|
||||
.I hhhh
|
||||
is the hexadecimal value of the character.
|
||||
.TP
|
||||
.B escape-codepoint
|
||||
Replace the missing characters with a string of the format
|
||||
.BR U+\fIhhhh\fP ,
|
||||
where
|
||||
.I hhhh
|
||||
is the hexadecimal value of the character. This is the format
|
||||
universally used to denote a Unicode codepoint in the litterature.
|
||||
.SH VERSION
|
||||
@VERSION@
|
||||
.SH COPYRIGHT
|
||||
|
@ -94,7 +94,8 @@ static struct callback_ent {
|
||||
{ "escape-c", UCNV_FROM_U_CALLBACK_ESCAPE, UCNV_ESCAPE_C, UCNV_TO_U_CALLBACK_ESCAPE, UCNV_ESCAPE_C },
|
||||
{ "escape-xml", UCNV_FROM_U_CALLBACK_ESCAPE, UCNV_ESCAPE_XML_DEC, UCNV_TO_U_CALLBACK_ESCAPE, UCNV_ESCAPE_XML_DEC },
|
||||
{ "escape-xml-dec", UCNV_FROM_U_CALLBACK_ESCAPE, UCNV_ESCAPE_XML_DEC, UCNV_TO_U_CALLBACK_ESCAPE, UCNV_ESCAPE_XML_DEC },
|
||||
{ "escape-xml-hex", UCNV_FROM_U_CALLBACK_ESCAPE, UCNV_ESCAPE_XML_HEX, UCNV_TO_U_CALLBACK_ESCAPE, UCNV_ESCAPE_XML_HEX }
|
||||
{ "escape-xml-hex", UCNV_FROM_U_CALLBACK_ESCAPE, UCNV_ESCAPE_XML_HEX, UCNV_TO_U_CALLBACK_ESCAPE, UCNV_ESCAPE_XML_HEX },
|
||||
{ "escape-codepoint", UCNV_FROM_U_CALLBACK_ESCAPE, UCNV_ESCAPE_CODEPOINT, UCNV_TO_U_CALLBACK_ESCAPE, UCNV_ESCAPE_CODEPOINT }
|
||||
};
|
||||
|
||||
static const struct callback_ent *findCallback(const char *name) {
|
||||
@ -324,6 +325,8 @@ static UBool convertFile(const char* fromcpage,
|
||||
const size_t readsize = buffsize-1;
|
||||
char* buff = 0;
|
||||
|
||||
uint32_t foffset = 0; /* Where we are in the file, for error reporting. */
|
||||
|
||||
UConverterFromUCallback oldfromucallback;
|
||||
UConverterToUCallback oldtoucallback;
|
||||
const void *oldcontext;
|
||||
@ -351,7 +354,9 @@ static UBool convertFile(const char* fromcpage,
|
||||
}
|
||||
|
||||
// Create codepage converter. If the codepage or its aliases weren't
|
||||
// available, it returns NULL and a failure code
|
||||
// available, it returns NULL and a failure code. We also set the
|
||||
// callbacks, and return errors in the same way.
|
||||
|
||||
convfrom = ucnv_open(fromcpage, &err);
|
||||
if (U_FAILURE(err))
|
||||
{
|
||||
@ -385,10 +390,13 @@ static UBool convertFile(const char* fromcpage,
|
||||
// To ensure that the buffer always is of enough size, we
|
||||
// must take the worst case scenario, that is the character in the codepage
|
||||
// that uses the most bytes and multiply it against the buffsize
|
||||
|
||||
totbuffsize = buffsize * ucnv_getMaxCharSize(convto);
|
||||
buff = new char[totbuffsize];
|
||||
unibuff = new UChar[buffsize];
|
||||
|
||||
|
||||
// OK, we can convert now.
|
||||
|
||||
do
|
||||
{
|
||||
rd = fread(buff, 1, readsize, infile);
|
||||
@ -412,7 +420,9 @@ static UBool convertFile(const char* fromcpage,
|
||||
cbuffiter = buff;
|
||||
flush = rd!=readsize;
|
||||
ucnv_toUnicode(convfrom, &uniiter, uniiter + buffsize, &cbuffiter, cbuffiter + rd, 0, flush, &err);
|
||||
|
||||
|
||||
foffset += uniiter - unibuff;
|
||||
|
||||
if (U_FAILURE(err))
|
||||
{
|
||||
u_wmsg("problemCvtToU", u_wmsg_errorName(err));
|
||||
@ -517,29 +527,35 @@ static void usage(const char *pname, int ecode)
|
||||
|
||||
int main(int argc, char** argv)
|
||||
{
|
||||
FILE* file = 0;
|
||||
FILE* infile;
|
||||
FILE* infile, *outfile;
|
||||
int ret = 0;
|
||||
|
||||
const char* fromcpage = 0;
|
||||
const char* tocpage = 0;
|
||||
const char *translit = 0;
|
||||
const char* infilestr = 0;
|
||||
const char* outfilestr = 0;
|
||||
|
||||
UConverterFromUCallback fromucallback = UCNV_FROM_U_CALLBACK_SUBSTITUTE;
|
||||
UConverterFromUCallback fromucallback = UCNV_FROM_U_CALLBACK_STOP;
|
||||
const void *fromuctxt = 0;
|
||||
UConverterToUCallback toucallback = UCNV_TO_U_CALLBACK_SUBSTITUTE;
|
||||
UConverterToUCallback toucallback = UCNV_TO_U_CALLBACK_STOP;
|
||||
const void *touctxt = 0;
|
||||
|
||||
char** iter = argv+1;
|
||||
char** end = argv+argc;
|
||||
|
||||
const char *pname = *argv;
|
||||
const char *pname;
|
||||
|
||||
int printConvs = 0, printCanon = 0;
|
||||
const char *printName = 0;
|
||||
int printTranslits = 0;
|
||||
|
||||
int silent = 0, verbose = 0;
|
||||
|
||||
// Prettify pname.
|
||||
for (pname = *argv + strlen(*argv) - 1; pname != *argv && *pname != U_FILE_SEP_CHAR; --pname);
|
||||
if (*pname == U_FILE_SEP_CHAR) ++pname;
|
||||
|
||||
// First, get the arguments from command-line
|
||||
// to know the codepages to convert between
|
||||
for (; iter!=end; iter++)
|
||||
@ -651,7 +667,40 @@ int main(int argc, char** argv)
|
||||
else if (!strcmp("-i", *iter)) {
|
||||
toucallback = UCNV_TO_U_CALLBACK_SKIP;
|
||||
}
|
||||
else if (**iter == '-' && (*iter)[1]) {
|
||||
else if (!strcmp("--callback", *iter)) {
|
||||
iter++;
|
||||
if (iter!=end) {
|
||||
const struct callback_ent *cbe = findCallback(*iter);
|
||||
if (cbe) {
|
||||
fromucallback = cbe->fromu;
|
||||
fromuctxt = cbe->fromuctxt;
|
||||
toucallback = cbe->tou;
|
||||
touctxt = cbe->touctxt;
|
||||
} else {
|
||||
UnicodeString str(*iter);
|
||||
initMsg(pname);
|
||||
u_wmsg("unknownCallback", str.getBuffer());
|
||||
return 4;
|
||||
}
|
||||
} else {
|
||||
usage(pname, 1);
|
||||
}
|
||||
}
|
||||
else if (!strcmp("-s", *iter) || !strcmp("--silent", *iter)) {
|
||||
silent = 1;
|
||||
} else if (!strcmp("-v", *iter) || !strcmp("--verbose", *iter)) {
|
||||
verbose = 1;
|
||||
} else if (!strcmp("-V", *iter) || !strcmp("--version", *iter)) {
|
||||
printf("%s v2.0\n", pname);
|
||||
return 0;
|
||||
} else if (!strcmp("-o", *iter) || !strcmp("--output", *iter)) {
|
||||
++iter;
|
||||
if (iter != end && !outfilestr) {
|
||||
outfilestr = *iter;
|
||||
} else {
|
||||
usage(pname, 1);
|
||||
}
|
||||
} else if (**iter == '-' && (*iter)[1]) {
|
||||
usage(pname, 1);
|
||||
} else if (!infilestr) {
|
||||
infilestr = *iter;
|
||||
@ -689,8 +738,8 @@ int main(int argc, char** argv)
|
||||
// Open the correct input file or connect to stdin for reading input
|
||||
if (infilestr!=0 && strcmp(infilestr, "-"))
|
||||
{
|
||||
file = fopen(infilestr, "rb");
|
||||
if (file==0)
|
||||
infile = fopen(infilestr, "rb");
|
||||
if (infile==0)
|
||||
{
|
||||
UnicodeString str1(infilestr,"");
|
||||
UnicodeString str2(strerror(errno),"");
|
||||
@ -700,9 +749,9 @@ int main(int argc, char** argv)
|
||||
str2.getBuffer());
|
||||
return 1;
|
||||
}
|
||||
infile = file;
|
||||
}
|
||||
else {
|
||||
infilestr = "-";
|
||||
infile = stdin;
|
||||
#ifdef WIN32
|
||||
if( setmode( fileno ( stdin ), O_BINARY ) == -1 ) {
|
||||
@ -711,15 +760,38 @@ int main(int argc, char** argv)
|
||||
}
|
||||
#endif
|
||||
}
|
||||
|
||||
// Open the correct output file or connect to stdout for reading input
|
||||
if (outfilestr!=0 && strcmp(outfilestr, "-"))
|
||||
{
|
||||
outfile = fopen(outfilestr, "wb");
|
||||
if (outfile==0)
|
||||
{
|
||||
UnicodeString str1(outfilestr,"");
|
||||
UnicodeString str2(strerror(errno),"");
|
||||
initMsg(pname);
|
||||
u_wmsg("cantCreateOutputF",
|
||||
str1.getBuffer(),
|
||||
str2.getBuffer());
|
||||
return 1;
|
||||
}
|
||||
} else {
|
||||
outfilestr = "-";
|
||||
outfile = stdout;
|
||||
#ifdef WIN32
|
||||
if( setmode( fileno ( stdout ), O_BINARY ) == -1 ) {
|
||||
perror ( "Cannot set stdout to binary mode" );
|
||||
exit(-1);
|
||||
}
|
||||
if( setmode( fileno ( outfile ), O_BINARY ) == -1 ) {
|
||||
perror ( "Cannot set output file to binary mode" );
|
||||
exit(-1);
|
||||
}
|
||||
#endif
|
||||
}
|
||||
|
||||
initMsg(pname);
|
||||
if (!convertFile(fromcpage, toucallback, touctxt, tocpage, fromucallback, fromuctxt, translit, infile, stdout))
|
||||
|
||||
if (verbose) {
|
||||
fprintf(stderr, "%s:\n", infilestr);
|
||||
}
|
||||
if (!convertFile(fromcpage, toucallback, touctxt, tocpage, fromucallback, fromuctxt, translit, infile, outfile))
|
||||
goto error_exit;
|
||||
|
||||
goto normal_exit;
|
||||
@ -727,8 +799,10 @@ int main(int argc, char** argv)
|
||||
ret = 1;
|
||||
normal_exit:
|
||||
|
||||
if (file!=0)
|
||||
fclose(file);
|
||||
if (infile!=stdin)
|
||||
fclose(infile);
|
||||
if (outfile != stdout) fclose(outfile);
|
||||
|
||||
return ret;
|
||||
}
|
||||
|
||||
|
Loading…
Reference in New Issue
Block a user