ICU-12766 cleanup and add test case for escaper
X-SVN-Rev: 39810
This commit is contained in:
parent
803210153a
commit
383d3eead1
3
.gitattributes
vendored
3
.gitattributes
vendored
@ -124,6 +124,9 @@ icu4c/source/test/testdata/importtest.bin -text
|
||||
icu4c/source/test/testdata/old_e_testtypes.res -text
|
||||
icu4c/source/test/testdata/old_l_testtypes.res -text
|
||||
icu4c/source/test/testdata/uni-text.bin -text
|
||||
icu4c/source/tools/escapesrc/expect-simple.cpp -text
|
||||
icu4c/source/tools/escapesrc/test-nochange.cpp -text
|
||||
icu4c/source/tools/escapesrc/test-simple.cpp -text
|
||||
icu4c/source/tools/genbrk/genbrk.vcxproj -text
|
||||
icu4c/source/tools/genccode/genccode.vcxproj -text
|
||||
icu4c/source/tools/gencfu/gencfu.vcxproj -text
|
||||
|
@ -140,7 +140,10 @@ $(LIBDIR) $(BINDIR):
|
||||
|
||||
## Recursive targets
|
||||
all-recursive install-recursive clean-recursive distclean-recursive dist-recursive check-recursive check-exhaustive-recursive: $(LIBDIR) $(BINDIR)
|
||||
ifneq ($(NEED_ESCAPING),)
|
||||
@echo "building tools/escapesrc (Needed for this platform with NEED_ESCAPING)"
|
||||
@(cd tools/escapesrc && $(MAKE) RECURSIVE=YES $$local_target) || exit
|
||||
endif
|
||||
@dot_seen=no; \
|
||||
target=`echo $@ | sed s/-recursive//`; \
|
||||
list='$(LOCAL_SUBDIRS)'; for subdir in $$list; do \
|
||||
|
@ -17,7 +17,7 @@ subdir = tools
|
||||
|
||||
SUBDIRS = toolutil ctestfw makeconv genrb genbrk \
|
||||
gencnval gensprep icuinfo genccode gencmn icupkg pkgdata \
|
||||
gentest gennorm2 gencfu gendict
|
||||
gentest gennorm2 gencfu gendict escapesrc
|
||||
|
||||
## List of phony targets
|
||||
.PHONY : all all-local all-recursive install install-local \
|
||||
|
@ -5,6 +5,7 @@
|
||||
## others. All Rights Reserved.
|
||||
## Steven R. Loomis
|
||||
|
||||
# To avoid recursion
|
||||
SKIP_ESCAPING=YES
|
||||
|
||||
## Source directory information
|
||||
@ -25,7 +26,7 @@ SECTION = 8
|
||||
#MAN_FILES = $(TARGET_STUB_NAME).$(SECTION)
|
||||
|
||||
## Extra files to remove for 'make clean'
|
||||
CLEANFILES = *~ $(DEPS) $(MAN_FILES)
|
||||
CLEANFILES = *~ $(DEPS) $(MAN_FILES) ./output-*.cpp
|
||||
|
||||
## Target information
|
||||
TARGET = $(BINDIR)/$(TARGET_STUB_NAME)$(EXEEXT)
|
||||
@ -74,6 +75,12 @@ distclean-local: clean-local
|
||||
$(RMV) Makefile
|
||||
|
||||
check-local: all-local
|
||||
@echo Testing test-nochange.cpp
|
||||
@$(INVOKE) $(TARGET) $(srcdir)/test-nochange.cpp ./output-nochange.cpp
|
||||
@-diff -I '#line.*' $(srcdir)/test-nochange.cpp ./output-nochange.cpp || (echo >&2 'warning: diff failed or not found' ; true)
|
||||
@echo Testing test-simple.cpp
|
||||
@$(INVOKE) $(TARGET) $(srcdir)/test-simple.cpp ./output-simple.cpp
|
||||
@-diff -I '#line.*' $(srcdir)/expect-simple.cpp ./output-simple.cpp || (echo >&2 'warning: diff failed or not found' ; true)
|
||||
|
||||
Makefile: $(srcdir)/Makefile.in $(top_builddir)/config.status
|
||||
cd $(top_builddir) \
|
||||
|
@ -6,6 +6,9 @@
|
||||
#include <stdlib.h>
|
||||
#include <unistd.h>
|
||||
#include <errno.h>
|
||||
#include <string.h>
|
||||
#include <iostream>
|
||||
#include <fstream>
|
||||
|
||||
// with caution:
|
||||
#include "unicode/utf8.h"
|
||||
@ -112,7 +115,6 @@ bool fixAt(std::string &linestr, size_t pos) {
|
||||
} else {
|
||||
// Proceed to decode utf-8
|
||||
const uint8_t *s = (const uint8_t*) (linestr.c_str());
|
||||
const uint8_t *b = s;
|
||||
int32_t i = pos;
|
||||
int32_t length = linestr.size();
|
||||
UChar32 c;
|
||||
@ -123,24 +125,22 @@ bool fixAt(std::string &linestr, size_t pos) {
|
||||
U8_NEXT(s, i, length, c);
|
||||
}
|
||||
if(c<0) {
|
||||
fprintf(stderr, "Illegal utf-8 sequence %04X pos %d\n", c, pos);
|
||||
fprintf(stderr, "Illegal utf-8 sequence\n");
|
||||
return true;
|
||||
}
|
||||
|
||||
size_t seqLen = (i-pos);
|
||||
|
||||
printf("U+%04X pos %d [len %d]\n", c, pos, seqLen);
|
||||
//printf("U+%04X pos %d [len %d]\n", c, pos, seqLen);
|
||||
|
||||
char newSeq[] = "\\U0000FFFD";
|
||||
if( c <= 0xFFFF) {
|
||||
char newSeq[] = "\\uFFFD";
|
||||
sprintf(newSeq, "\\u%04X", c);
|
||||
linestr.replace(pos, seqLen, newSeq);
|
||||
//pos += seqLen; // advance
|
||||
pos += strlen(newSeq) - 1;
|
||||
} else {
|
||||
fprintf(stderr, "%s: Error: not implemented yet: surrogate pairs for U+%04X\n", prog.c_str(), c);
|
||||
return true;
|
||||
sprintf(newSeq, "\\U%08X", c);
|
||||
}
|
||||
linestr.replace(pos, seqLen, newSeq);
|
||||
pos += strlen(newSeq) - 1;
|
||||
}
|
||||
}
|
||||
|
||||
@ -151,91 +151,85 @@ bool fixAt(std::string &linestr, size_t pos) {
|
||||
* false = no err
|
||||
* true = had err
|
||||
*/
|
||||
bool fixLine(int no, std::string &linestr) {
|
||||
bool fixLine(int /*no*/, std::string &linestr) {
|
||||
const char *line = linestr.c_str();
|
||||
size_t len = linestr.size();
|
||||
// Quick Check: all ascii?
|
||||
|
||||
// no u' in the line?
|
||||
if(!strstr(line, "u'") && !strstr(line, "u\"")) {
|
||||
return false; // Nothing to do. No u' or u" detected
|
||||
}
|
||||
|
||||
// Quick Check: all ascii?
|
||||
if(!hasNonAscii(line, len)) {
|
||||
return false; // ASCII
|
||||
}
|
||||
|
||||
// comment or empty line?
|
||||
if(isCommentOrEmpty(line, len)) {
|
||||
return false; // Comment or just empty
|
||||
}
|
||||
|
||||
if(!strnstr(line, "u'", len) && !strnstr(line, "u\"", len)) {
|
||||
return false; // Nothing to do. No u' or u" detected
|
||||
}
|
||||
|
||||
// start from the end and find all u" cases
|
||||
size_t pos = len = linestr.size();
|
||||
while((pos = linestr.rfind("u\"", pos)) != std::string::npos) {
|
||||
printf("found doublequote at %d\n", pos);
|
||||
while((pos>0) && (pos = linestr.rfind("u\"", pos)) != std::string::npos) {
|
||||
//printf("found doublequote at %d\n", pos);
|
||||
if(fixAt(linestr, pos)) return true;
|
||||
if(pos == 0) break;
|
||||
pos--;
|
||||
}
|
||||
|
||||
// reset and find all u' cases
|
||||
pos = len = linestr.size();
|
||||
while((pos = linestr.rfind("u'", pos)) != std::string::npos) {
|
||||
printf("found singlequote at %d\n", pos);
|
||||
while((pos>0) && (pos = linestr.rfind("u'", pos)) != std::string::npos) {
|
||||
//printf("found singlequote at %d\n", pos);
|
||||
if(fixAt(linestr, pos)) return true;
|
||||
if(pos == 0) break;
|
||||
pos--;
|
||||
}
|
||||
|
||||
fprintf(stderr, "%d - fixed\n", no);
|
||||
//fprintf(stderr, "%d - fixed\n", no);
|
||||
return false;
|
||||
}
|
||||
|
||||
int convert(const std::string &infile, const std::string &outfile) {
|
||||
fprintf(stderr, "%s: %s -> %s\n", prog.c_str(), infile.c_str(), outfile.c_str());
|
||||
fprintf(stderr, "escapesrc: %s -> %s\n", infile.c_str(), outfile.c_str());
|
||||
|
||||
FILE *inf = fopen(infile.c_str(), "rb");
|
||||
if(!inf) {
|
||||
std::ifstream inf;
|
||||
|
||||
inf.open(infile, std::ios::in);
|
||||
|
||||
if(!inf.is_open()) {
|
||||
fprintf(stderr, "%s: could not open input file %s\n", prog.c_str(), infile.c_str());
|
||||
cleanup(outfile);
|
||||
return 1;
|
||||
}
|
||||
|
||||
FILE *outf = fopen(outfile.c_str(), "w");
|
||||
std::ofstream outf;
|
||||
|
||||
if(!outf) {
|
||||
outf.open(outfile, std::ios::out);
|
||||
|
||||
if(!outf.is_open()) {
|
||||
fprintf(stderr, "%s: could not open output file %s\n", prog.c_str(), outfile.c_str());
|
||||
fclose(inf);
|
||||
return 1;
|
||||
}
|
||||
|
||||
// TODO: any platform variations of this?
|
||||
fprintf(outf, "#line 1 \"%s\"\n", infile.c_str());
|
||||
// TODO: any platform variations of #line?
|
||||
outf << "#line 1 \"" << infile << "\"" << '\n';
|
||||
|
||||
size_t len;
|
||||
char *line;
|
||||
int no = 0;
|
||||
std::string linestr;
|
||||
while((line = fgetln(inf, &len))!= NULL) {
|
||||
while( getline( inf, linestr)) {
|
||||
no++;
|
||||
linestr.assign(line, len);
|
||||
if(fixLine(no, linestr)) {
|
||||
fclose(inf);
|
||||
fclose(outf);
|
||||
outf.close();
|
||||
fprintf(stderr, "%s:%d: Fixup failed by %s\n", infile.c_str(), no, prog.c_str());
|
||||
cleanup(outfile);
|
||||
return 1;
|
||||
}
|
||||
len = linestr.size(); // size may have changed.
|
||||
|
||||
if(fwrite(linestr.c_str(), 1, linestr.size(), outf) != len) {
|
||||
fclose(inf);
|
||||
fclose(outf);
|
||||
fprintf(stderr, "%s: short write to %s:%d\n", prog.c_str(), outfile.c_str(), no);
|
||||
cleanup(outfile);
|
||||
return 1;
|
||||
}
|
||||
outf << linestr << '\n';
|
||||
}
|
||||
|
||||
fclose(inf);
|
||||
fclose(outf);
|
||||
return 0;
|
||||
}
|
||||
|
||||
|
10
icu4c/source/tools/escapesrc/expect-simple.cpp
Normal file
10
icu4c/source/tools/escapesrc/expect-simple.cpp
Normal file
@ -0,0 +1,10 @@
|
||||
// © 2016 and later: Unicode, Inc. and others.
|
||||
// License & terms of use: http://www.unicode.org/copyright.html
|
||||
|
||||
u"sa\u0127\u0127a";
|
||||
u'\u6587';
|
||||
u"\U000219F2";
|
||||
|
||||
u"sa\u0127\u0127a";
|
||||
u'\u6587'; u"\U000219F2";
|
||||
|
5
icu4c/source/tools/escapesrc/test-nochange.cpp
Normal file
5
icu4c/source/tools/escapesrc/test-nochange.cpp
Normal file
@ -0,0 +1,5 @@
|
||||
// © 2016 and later: Unicode, Inc. and others.
|
||||
// License & terms of use: http://www.unicode.org/copyright.html
|
||||
|
||||
// This is a source file with no changes needed in it.
|
||||
// In fact, the only non-ASCII character is the comment line at top.
|
10
icu4c/source/tools/escapesrc/test-simple.cpp
Normal file
10
icu4c/source/tools/escapesrc/test-simple.cpp
Normal file
@ -0,0 +1,10 @@
|
||||
// © 2016 and later: Unicode, Inc. and others.
|
||||
// License & terms of use: http://www.unicode.org/copyright.html
|
||||
|
||||
u"saħħa";
|
||||
u'文';
|
||||
u"𡧲";
|
||||
|
||||
u"saħħa";
|
||||
u'文'; u"𡧲";
|
||||
|
Loading…
Reference in New Issue
Block a user