ICU-12766 escaping: merge from branch

* passes on AIX and Solaris
* auto-escape source code on these platforms

X-SVN-Rev: 39837
This commit is contained in:
Steven R. Loomis 2017-03-16 19:01:12 +00:00
commit b47a86f204
27 changed files with 631 additions and 57 deletions

3
.gitattributes vendored
View File

@ -124,6 +124,9 @@ icu4c/source/test/testdata/importtest.bin -text
icu4c/source/test/testdata/old_e_testtypes.res -text
icu4c/source/test/testdata/old_l_testtypes.res -text
icu4c/source/test/testdata/uni-text.bin -text
icu4c/source/tools/escapesrc/expect-simple.cpp -text
icu4c/source/tools/escapesrc/test-nochange.cpp -text
icu4c/source/tools/escapesrc/test-simple.cpp -text
icu4c/source/tools/genbrk/genbrk.vcxproj -text
icu4c/source/tools/genccode/genccode.vcxproj -text
icu4c/source/tools/gencfu/gencfu.vcxproj -text

View File

@ -194,7 +194,7 @@ EXPAND_ONLY_PREDEF = YES
SEARCH_INCLUDES = YES
INCLUDE_PATH =
INCLUDE_FILE_PATTERNS =
PREDEFINED = U_EXPORT2= U_STABLE= U_DRAFT= U_INTERNAL= U_SYSTEM= U_DEPRECATED= U_OBSOLETE= U_CALLCONV= U_CDECL_BEGIN= U_CDECL_END= U_NO_THROW=\ "U_NAMESPACE_BEGIN=namespace icu{" "U_NAMESPACE_END=}" U_SHOW_CPLUSPLUS_API=1 U_DEFINE_LOCAL_OPEN_POINTER()= U_IN_DOXYGEN=1 U_OVERRIDE= U_FINAL= UCONFIG_ENABLE_PLUGINS=1
PREDEFINED = U_EXPORT2= U_STABLE= U_DRAFT= U_INTERNAL= U_SYSTEM= U_DEPRECATED= U_OBSOLETE= U_CALLCONV= U_CDECL_BEGIN= U_CDECL_END= U_NO_THROW=\ "U_NAMESPACE_BEGIN=namespace icu{" "U_NAMESPACE_END=}" U_SHOW_CPLUSPLUS_API=1 U_DEFINE_LOCAL_OPEN_POINTER()= U_IN_DOXYGEN=1 U_OVERRIDE= U_FINAL= UCONFIG_ENABLE_PLUGINS=1 U_CHAR16_IS_TYPEDEF=0 U_CPLUSPLUS_VERSION=11
EXPAND_AS_DEFINED =
SKIP_FUNCTION_MACROS = YES
#---------------------------------------------------------------------------

View File

@ -140,6 +140,10 @@ $(LIBDIR) $(BINDIR):
## Recursive targets
all-recursive install-recursive clean-recursive distclean-recursive dist-recursive check-recursive check-exhaustive-recursive: $(LIBDIR) $(BINDIR)
ifneq ($(NEED_ESCAPING),)
@echo "building tools/escapesrc (Needed for this platform with NEED_ESCAPING)"
@(cd tools/escapesrc && $(MAKE) RECURSIVE=YES $$local_target) || exit
endif
@dot_seen=no; \
target=`echo $@ | sed s/-recursive//`; \
list='$(LOCAL_SUBDIRS)'; for subdir in $$list; do \

View File

@ -42,12 +42,14 @@ public:
* @draft ICU 59
*/
inline Char16Ptr(char16_t *p);
#if !U_CHAR16_IS_TYPEDEF
/**
* Converts the pointer to char16_t *.
* @param p pointer to be converted
* @draft ICU 59
*/
inline Char16Ptr(uint16_t *p);
#endif
#if U_SIZEOF_WCHAR_T==2 || defined(U_IN_DOXYGEN)
/**
* Converts the pointer to char16_t *.
@ -104,7 +106,9 @@ private:
#ifdef U_ALIASING_BARRIER
Char16Ptr::Char16Ptr(char16_t *p) : p(p) {}
#if !U_CHAR16_IS_TYPEDEF
Char16Ptr::Char16Ptr(uint16_t *p) : p(cast(p)) {}
#endif
#if U_SIZEOF_WCHAR_T==2
Char16Ptr::Char16Ptr(wchar_t *p) : p(cast(p)) {}
#endif
@ -118,7 +122,9 @@ char16_t *Char16Ptr::get() const { return p; }
#else
Char16Ptr::Char16Ptr(char16_t *p) { u.cp = p; }
#if !U_CHAR16_IS_TYPEDEF
Char16Ptr::Char16Ptr(uint16_t *p) { u.up = p; }
#endif
#if U_SIZEOF_WCHAR_T==2
Char16Ptr::Char16Ptr(wchar_t *p) { u.wp = p; }
#endif
@ -141,12 +147,14 @@ public:
* @draft ICU 59
*/
inline ConstChar16Ptr(const char16_t *p);
#if !U_CHAR16_IS_TYPEDEF
/**
* Converts the pointer to char16_t *.
* @param p pointer to be converted
* @draft ICU 59
*/
inline ConstChar16Ptr(const uint16_t *p);
#endif
#if U_SIZEOF_WCHAR_T==2 || defined(U_IN_DOXYGEN)
/**
* Converts the pointer to char16_t *.
@ -162,6 +170,7 @@ public:
* @draft ICU 59
*/
inline ConstChar16Ptr(const std::nullptr_t p);
/**
* Destructor.
* @draft ICU 59
@ -203,7 +212,9 @@ private:
#ifdef U_ALIASING_BARRIER
ConstChar16Ptr::ConstChar16Ptr(const char16_t *p) : p(p) {}
#if !U_CHAR16_IS_TYPEDEF
ConstChar16Ptr::ConstChar16Ptr(const uint16_t *p) : p(cast(p)) {}
#endif
#if U_SIZEOF_WCHAR_T==2
ConstChar16Ptr::ConstChar16Ptr(const wchar_t *p) : p(cast(p)) {}
#endif
@ -217,7 +228,9 @@ const char16_t *ConstChar16Ptr::get() const { return p; }
#else
ConstChar16Ptr::ConstChar16Ptr(const char16_t *p) { u.cp = p; }
#if !U_CHAR16_IS_TYPEDEF
ConstChar16Ptr::ConstChar16Ptr(const uint16_t *p) { u.up = p; }
#endif
#if U_SIZEOF_WCHAR_T==2
ConstChar16Ptr::ConstChar16Ptr(const wchar_t *p) { u.wp = p; }
#endif

View File

@ -486,6 +486,13 @@
# define U_CPLUSPLUS_VERSION 1
#endif
#if (U_PLATFORM == U_PF_AIX) && defined(__cplusplus) &&(U_CPLUSPLUS_VERSION < 11)
// add in std::nullptr_t
namespace std {
typedef decltype(nullptr) nullptr_t;
};
#endif
/**
* \def U_HAVE_RVALUE_REFERENCES
* Set to 1 if the compiler supports rvalue references.

View File

@ -291,10 +291,16 @@ typedef int8_t UBool;
#define U_SIZEOF_UCHAR 2
/**
* for AIX, uchar.h needs to be included
* \def U_CHAR16_IS_TYPEDEF
* If 1, then char16_t is a typedef and not a real type (yet)
* @internal
*/
#if (U_PLATFORM == U_PF_AIX) && defined(__cplusplus) &&(U_CPLUSPLUS_VERSION < 11)
// for AIX, uchar.h needs to be included
# include <uchar.h>
# define U_CHAR16_IS_TYPEDEF 1
#else
# define U_CHAR16_IS_TYPEDEF 0
#endif

View File

@ -119,7 +119,11 @@ class UnicodeStringAppendable; // unicode/appendable.h
* <code>NUL</code>, must be specified as a constant.
* @stable ICU 2.0
*/
#define UNICODE_STRING(cs, _length) icu::UnicodeString(TRUE, u ## cs, _length)
#if !U_CHAR16_IS_TYPEDEF
# define UNICODE_STRING(cs, _length) icu::UnicodeString(TRUE, u ## cs, _length)
#else
# define UNICODE_STRING(cs, _length) icu::UnicodeString(TRUE, (const char16_t*)u ## cs, _length)
#endif
/**
* Unicode String literals in C++.
@ -3002,6 +3006,7 @@ public:
*/
UNISTR_FROM_STRING_EXPLICIT UnicodeString(const char16_t *text);
#if !U_CHAR16_IS_TYPEDEF
/**
* uint16_t * constructor.
* Delegates to UnicodeString(const char16_t *).
@ -3014,6 +3019,7 @@ public:
*/
UNISTR_FROM_STRING_EXPLICIT UnicodeString(const uint16_t *text) :
UnicodeString(ConstChar16Ptr(text)) {}
#endif
#if U_SIZEOF_WCHAR_T==2 || defined(U_IN_DOXYGEN)
/**
@ -3053,6 +3059,7 @@ public:
UnicodeString(const char16_t *text,
int32_t textLength);
#if !U_CHAR16_IS_TYPEDEF
/**
* uint16_t * constructor.
* Delegates to UnicodeString(const char16_t *, int32_t).
@ -3062,6 +3069,7 @@ public:
*/
UnicodeString(const uint16_t *text, int32_t length) :
UnicodeString(ConstChar16Ptr(text), length) {}
#endif
#if U_SIZEOF_WCHAR_T==2 || defined(U_IN_DOXYGEN)
/**
@ -3131,6 +3139,7 @@ public:
*/
UnicodeString(char16_t *buffer, int32_t buffLength, int32_t buffCapacity);
#if !U_CHAR16_IS_TYPEDEF
/**
* Writable-aliasing uint16_t * constructor.
* Delegates to UnicodeString(const char16_t *, int32_t, int32_t).
@ -3141,6 +3150,7 @@ public:
*/
UnicodeString(uint16_t *buffer, int32_t buffLength, int32_t buffCapacity) :
UnicodeString(Char16Ptr(buffer), buffLength, buffCapacity) {}
#endif
#if U_SIZEOF_WCHAR_T==2 || defined(U_IN_DOXYGEN)
/**

View File

@ -19,7 +19,7 @@ include $(top_builddir)/icudefs.mk
DISTY_TMP=dist/tmp
DISTY_ICU=$(DISTY_TMP)/icu
DISTY_DATA=$(DISTY_ICU)/source/data
DISTY_RMV=brkitr coll curr lang locales mappings rbnf region translit xml zone misc sprep unit
DISTY_RMV=brkitr coll curr lang locales mappings rbnf region translit xml zone misc unit
DISTY_RMDIR=$(DISTY_RMV:%=$(DISTY_DATA)/%)
DISTY_IN=$(DISTY_DATA)/in
DOCZIP=icu-docs.zip

View File

@ -111,11 +111,31 @@ $(LIBDIR)/%.a : %.so
%.o: $(srcdir)/%.c
$(COMPILE.c) $(DYNAMICCPPFLAGS) $(DYNAMICCFLAGS) -o $@ $<
%.$(STATIC_O): $(srcdir)/%.cpp
$(COMPILE.cc) $(STATICCPPFLAGS) $(STATICCXXFLAGS) -o $@ $<
## C++ compilation rules.
# This causes escapesrc to be built before other ICU targets.
NEED_ESCAPING=YES
ifneq ($(SKIP_ESCAPING),)
# no escaping - bootstrap
%.o: $(srcdir)/%.cpp
$(COMPILE.cc) $(DYNAMICCPPFLAGS) $(DYNAMICCXXFLAGS) -o $@ $<
else
# convert *.cpp files to _*.cpp with \u / \U escaping
CLEANFILES += _*.cpp
# the actual escaping
_%.cpp: $(srcdir)/%.cpp
@$(BINDIR)/escapesrc$(EXEEXT) $< $@
# compilation for static obj
%.$(STATIC_O): _%.cpp
$(COMPILE.cc) $(STATICCPPFLAGS) $(STATICCXXFLAGS) -o $@ $<
# compilation for dynamic obj
%.o: _%.cpp
$(COMPILE.cc) $(DYNAMICCPPFLAGS) $(DYNAMICCXXFLAGS) -o $@ $<
endif
## Dependency rules
%.d : %.u

View File

@ -63,17 +63,34 @@ SO= so
## Non-shared intermediate object suffix
STATIC_O = o
# This causes escapesrc to be built before other ICU targets.
NEED_ESCAPING=YES
## Compilation rules
%.$(STATIC_O): $(srcdir)/%.c
$(COMPILE.c) $(STATICCPPFLAGS) $(STATICCFLAGS) -o $@ $<
%.o: $(srcdir)/%.c
$(COMPILE.c) $(DYNAMICCPPFLAGS) $(DYNAMICCFLAGS) -o $@ $<
%.$(STATIC_O): $(srcdir)/%.cpp
$(COMPILE.cc) $(STATICCPPFLAGS) $(STATICCXXFLAGS) -o $@ $<
ifneq ($(SKIP_ESCAPING),)
%.o: $(srcdir)/%.cpp
$(COMPILE.cc) $(DYNAMICCPPFLAGS) $(DYNAMICCXXFLAGS) -o $@ $<
%.$(STATIC_O): $(srcdir)/%.cpp
$(COMPILE.cc) $(STATICCPPFLAGS) $(STATICCXXFLAGS) -o $@ $<
else
# convert *.cpp files to _*.cpp with \u / \U escaping
CLEANFILES += _*.cpp
# the actual escaping
_%.cpp: $(srcdir)/%.cpp
@$(BINDIR)/escapesrc$(EXEEXT) $< $@
# no escaping - bootstrap
%.$(STATIC_O): _%.cpp
$(COMPILE.cc) $(STATICCPPFLAGS) $(STATICCXXFLAGS) -o $@ $<
%.o: _%.cpp
$(COMPILE.cc) $(DYNAMICCPPFLAGS) $(DYNAMICCXXFLAGS) -o $@ $<
endif
## Dependency rules
%.d : $(srcdir)/%.c

View File

@ -1,6 +1,6 @@
#! /bin/sh
# Guess values for system-dependent variables and create Makefiles.
# Generated by GNU Autoconf 2.69 for ICU 59.1
# Generated by GNU Autoconf 2.69 for ICU 59.1.
#
# Report bugs to <http://icu-project.org/bugs>.
#
@ -674,7 +674,6 @@ GENCCODE_ASSEMBLY
HAVE_MMAP
LIB_THREAD
U_HAVE_ATOMIC
U_HAVE_STD_STRING
ENABLE_RPATH
U_ENABLE_DYLOAD
U_HAVE_PLUGINS
@ -5958,37 +5957,6 @@ $as_echo "$as_me: Adding CXXFLAGS option -std=c++11" >&6;}
fi
fi
{ $as_echo "$as_me:${as_lineno-$LINENO}: checking if #include <string> works" >&5
$as_echo_n "checking if #include <string> works... " >&6; }
cat confdefs.h - <<_ACEOF >conftest.$ac_ext
/* end confdefs.h. */
#include <string>
int
main ()
{
;
return 0;
}
_ACEOF
if ac_fn_cxx_try_compile "$LINENO"; then :
ac_cv_header_stdstring=yes
else
ac_cv_header_stdstring=no
fi
rm -f core conftest.err conftest.$ac_objext conftest.$ac_ext
{ $as_echo "$as_me:${as_lineno-$LINENO}: result: $ac_cv_header_stdstring" >&5
$as_echo "$ac_cv_header_stdstring" >&6; }
if test $ac_cv_header_stdstring = yes
then
U_HAVE_STD_STRING=1
else
U_HAVE_STD_STRING=0
CONFIG_CPPFLAGS="${CONFIG_CPPFLAGS} -DU_HAVE_STD_STRING=0"
fi
{ $as_echo "$as_me:${as_lineno-$LINENO}: checking if #include <atomic> works" >&5
$as_echo_n "checking if #include <atomic> works... " >&6; }
cat confdefs.h - <<_ACEOF >conftest.$ac_ext
@ -7800,7 +7768,7 @@ echo "CXXFLAGS=$CXXFLAGS"
# output the Makefiles
ac_config_files="$ac_config_files icudefs.mk Makefile data/pkgdataMakefile config/Makefile.inc config/icu.pc config/pkgdataMakefile data/Makefile stubdata/Makefile common/Makefile i18n/Makefile layoutex/Makefile io/Makefile extra/Makefile extra/uconv/Makefile extra/uconv/pkgdataMakefile extra/scrptrun/Makefile tools/Makefile tools/ctestfw/Makefile tools/toolutil/Makefile tools/makeconv/Makefile tools/genrb/Makefile tools/genccode/Makefile tools/gencmn/Makefile tools/gencnval/Makefile tools/gendict/Makefile tools/gentest/Makefile tools/gennorm2/Makefile tools/genbrk/Makefile tools/gensprep/Makefile tools/icuinfo/Makefile tools/icupkg/Makefile tools/icuswap/Makefile tools/pkgdata/Makefile tools/tzcode/Makefile tools/gencfu/Makefile test/Makefile test/compat/Makefile test/testdata/Makefile test/testdata/pkgdataMakefile test/hdrtst/Makefile test/intltest/Makefile test/cintltst/Makefile test/iotest/Makefile test/letest/Makefile test/perf/Makefile test/perf/collationperf/Makefile test/perf/collperf/Makefile test/perf/collperf2/Makefile test/perf/dicttrieperf/Makefile test/perf/ubrkperf/Makefile test/perf/charperf/Makefile test/perf/convperf/Makefile test/perf/normperf/Makefile test/perf/DateFmtPerf/Makefile test/perf/howExpensiveIs/Makefile test/perf/strsrchperf/Makefile test/perf/unisetperf/Makefile test/perf/usetperf/Makefile test/perf/ustrperf/Makefile test/perf/utfperf/Makefile test/perf/utrie2perf/Makefile test/perf/leperf/Makefile samples/Makefile samples/date/Makefile samples/cal/Makefile samples/layout/Makefile"
ac_config_files="$ac_config_files icudefs.mk Makefile data/pkgdataMakefile config/Makefile.inc config/icu.pc config/pkgdataMakefile data/Makefile stubdata/Makefile common/Makefile i18n/Makefile layoutex/Makefile io/Makefile extra/Makefile extra/uconv/Makefile extra/uconv/pkgdataMakefile extra/scrptrun/Makefile tools/Makefile tools/ctestfw/Makefile tools/toolutil/Makefile tools/makeconv/Makefile tools/genrb/Makefile tools/genccode/Makefile tools/gencmn/Makefile tools/gencnval/Makefile tools/gendict/Makefile tools/gentest/Makefile tools/gennorm2/Makefile tools/genbrk/Makefile tools/gensprep/Makefile tools/icuinfo/Makefile tools/icupkg/Makefile tools/icuswap/Makefile tools/pkgdata/Makefile tools/tzcode/Makefile tools/gencfu/Makefile tools/escapesrc/Makefile test/Makefile test/compat/Makefile test/testdata/Makefile test/testdata/pkgdataMakefile test/hdrtst/Makefile test/intltest/Makefile test/cintltst/Makefile test/iotest/Makefile test/letest/Makefile test/perf/Makefile test/perf/collationperf/Makefile test/perf/collperf/Makefile test/perf/collperf2/Makefile test/perf/dicttrieperf/Makefile test/perf/ubrkperf/Makefile test/perf/charperf/Makefile test/perf/convperf/Makefile test/perf/normperf/Makefile test/perf/DateFmtPerf/Makefile test/perf/howExpensiveIs/Makefile test/perf/strsrchperf/Makefile test/perf/unisetperf/Makefile test/perf/usetperf/Makefile test/perf/ustrperf/Makefile test/perf/utfperf/Makefile test/perf/utrie2perf/Makefile test/perf/leperf/Makefile samples/Makefile samples/date/Makefile samples/cal/Makefile samples/layout/Makefile"
cat >confcache <<\_ACEOF
# This file is a shell script that caches the results of configure
@ -8545,6 +8513,7 @@ do
"tools/pkgdata/Makefile") CONFIG_FILES="$CONFIG_FILES tools/pkgdata/Makefile" ;;
"tools/tzcode/Makefile") CONFIG_FILES="$CONFIG_FILES tools/tzcode/Makefile" ;;
"tools/gencfu/Makefile") CONFIG_FILES="$CONFIG_FILES tools/gencfu/Makefile" ;;
"tools/escapesrc/Makefile") CONFIG_FILES="$CONFIG_FILES tools/escapesrc/Makefile" ;;
"test/Makefile") CONFIG_FILES="$CONFIG_FILES test/Makefile" ;;
"test/compat/Makefile") CONFIG_FILES="$CONFIG_FILES test/compat/Makefile" ;;
"test/testdata/Makefile") CONFIG_FILES="$CONFIG_FILES test/testdata/Makefile" ;;

View File

@ -44,7 +44,7 @@ CLEANFILES = *~ $(DEPS) $(ALL_MAN_FILES)
## Target information
TARGET = $(BINDIR)/$(TARGET_STUB_NAME)$(EXEEXT)
CPPFLAGS += -I$(top_srcdir)/common -I$(top_srcdir)/i18n -I$(srcdir)/../toolutil
CPPFLAGS += -I$(srcdir) -I$(top_srcdir)/common -I$(top_srcdir)/i18n -I$(srcdir)/../toolutil
CPPFLAGS += -DUNISTR_FROM_CHAR_EXPLICIT=explicit -DUNISTR_FROM_STRING_EXPLICIT=explicit
LIBS = $(LIBICUI18N) $(LIBICUUC) $(DEFAULT_LIBS) $(LIB_M)

View File

@ -55,7 +55,7 @@ DYNAMICCXXFLAGS = $(SHAREDLIBCXXFLAGS)
CFLAGS += $(LIBCFLAGS)
CXXFLAGS += $(LIBCXXFLAGS)
CPPFLAGS += -I$(top_srcdir)/common -I$(top_srcdir)/i18n $(LIBCPPFLAGS) $(CPPFLAGSICUIO)
CPPFLAGS += -I$(srcdir) -I$(top_srcdir)/common -I$(top_srcdir)/i18n $(LIBCPPFLAGS) $(CPPFLAGSICUIO)
DEFS += -DU_IO_IMPLEMENTATION
LDFLAGS += $(LDFLAGSICUIO)
LIBS = $(LIBICUUC) $(LIBICUI18N) $(DEFAULT_LIBS)

View File

@ -34,7 +34,7 @@ BUILDDIR := $(BUILDDIR:test\\intltest/../../=)
# Simplify the path for Windows 98
BUILDDIR := $(BUILDDIR:TEST\\INTLTEST/../../=)
CPPFLAGS += -I$(top_srcdir)/common -I$(top_srcdir)/i18n -I$(top_srcdir)/tools/toolutil -I$(top_srcdir)/tools/ctestfw
CPPFLAGS += -I$(srcdir) -I$(top_srcdir)/common -I$(top_srcdir)/i18n -I$(top_srcdir)/tools/toolutil -I$(top_srcdir)/tools/ctestfw
CPPFLAGS += -DUNISTR_FROM_CHAR_EXPLICIT= -DUNISTR_FROM_STRING_EXPLICIT= -DUCHAR_TYPE=char16_t
DEFS += -D'U_TOPSRCDIR="$(top_srcdir)/"' -D'U_TOPBUILDDIR="$(BUILDDIR)"'
LIBS = $(LIBCTESTFW) $(LIBICUI18N) $(LIBICUUC) $(LIBICUTOOLUTIL) $(DEFAULT_LIBS) $(LIB_M) $(LIB_THREAD)

View File

@ -1682,18 +1682,22 @@ void RBBITest::TestUnicodeFiles() {
UBool RBBITest::testCaseIsKnownIssue(const UnicodeString &testCase, const char *fileName) {
static const UChar *badTestCases[] = { // Line Numbers from Unicode 7.0.0 file.
u"\u200B\u0020\u007D", // Line 5198
u"\u200B\u0020\u0029", // Line 5202
u"\u200B\u0020\u0021", // Line 5214
u"\u200B\u0020\u002c", // Line 5246
u"\u200B\u0020\u002f", // Line 5298
u"\u200B\u0020}", // Line 5198
u"\u200B\u0020)", // Line 5202
u"\u200B\u0020!", // Line 5214
u"\u200B\u0020,", // Line 5246
u"\u200B\u0020/", // Line 5298
u"\u200B\u0020\u2060" // Line 5302
};
if (strcmp(fileName, "LineBreakTest.txt") != 0) {
return FALSE;
}
#if (U_CPLUSPLUS_VERSION >= 11)
for (const UChar *badCase: badTestCases) {
#else
for (const UChar *badCase = badTestCases[0]; badCase < badTestCases[UPRV_LENGTHOF(badTestCases)]; badCase++) {
#endif
if (testCase == UnicodeString(badCase)) {
return logKnownIssue("7270");
}

View File

@ -34,7 +34,7 @@ BUILDDIR := $(BUILDDIR:test\\iotest/../../=)
# Simplify the path for Windows 98
BUILDDIR := $(BUILDDIR:TEST\\IOTEST/../../=)
CPPFLAGS += -I$(top_srcdir)/common -I$(top_srcdir)/i18n -I$(top_srcdir)/tools/ctestfw -I$(top_srcdir)/io
CPPFLAGS += -I$(srcdir) -I$(top_srcdir)/common -I$(top_srcdir)/i18n -I$(top_srcdir)/tools/ctestfw -I$(top_srcdir)/io
CPPFLAGS += -DUNISTR_FROM_CHAR_EXPLICIT= -DUNISTR_FROM_STRING_EXPLICIT=
DEFS += -D'U_TOPSRCDIR="$(top_srcdir)/"' -D'U_TOPBUILDDIR="$(BUILDDIR)"'
LIBS = $(LIBCTESTFW) $(LIBICUTOOLUTIL) $(LIBICUIO) $(LIBICUI18N) $(LIBICUUC) $(DEFAULT_LIBS) $(LIB_M)

View File

@ -17,7 +17,7 @@ subdir = tools
SUBDIRS = toolutil ctestfw makeconv genrb genbrk \
gencnval gensprep icuinfo genccode gencmn icupkg pkgdata \
gentest gennorm2 gencfu gendict
gentest gennorm2 gencfu gendict escapesrc
## List of phony targets
.PHONY : all all-local all-recursive install install-local \

View File

@ -0,0 +1,106 @@
## Makefile.in for ICU - tools/escapesrc
## Copyright (C) 2016 and later: Unicode, Inc. and others.
## License & terms of use: http://www.unicode.org/copyright.html
## Copyright (c) 1999-2011, International Business Machines Corporation and
## others. All Rights Reserved.
## Steven R. Loomis
# To avoid recursion
SKIP_ESCAPING=YES
## Source directory information
srcdir = @srcdir@
top_srcdir = @top_srcdir@
top_builddir = ../..
include $(top_builddir)/icudefs.mk
## Build directory information
subdir = tools/escapesrc
TARGET_STUB_NAME = escapesrc
SECTION = 8
#MAN_FILES = $(TARGET_STUB_NAME).$(SECTION)
## Extra files to remove for 'make clean'
CLEANFILES = *~ $(DEPS) $(MAN_FILES) ./output-*.cpp
## Target information
TARGET = $(BINDIR)/$(TARGET_STUB_NAME)$(EXEEXT)
CPPFLAGS += -I$(top_srcdir)/common -I$(srcdir)/../toolutil
#LIBS = $(LIBICUTOOLUTIL) $(LIBICUI18N) $(LIBICUUC)
LIBS += $(DEFAULT_LIBS) $(LIB_M)
OBJECTS = escapesrc.o
DEPS = $(OBJECTS:.o=.d)
## List of phony targets
.PHONY : all all-local install install-local clean clean-local \
distclean distclean-local dist dist-local check check-local install-man
## Clear suffix list
.SUFFIXES :
## List of standard targets
all: all-local
install: install-local
clean: clean-local
distclean : distclean-local
dist: dist-local
check: all check-local
all-local: $(TARGET) $(MAN_FILES)
install-local: all-local install-man
$(MKINSTALLDIRS) $(DESTDIR)$(sbindir)
$(INSTALL) $(TARGET) $(DESTDIR)$(sbindir)
install-man: $(MAN_FILES)
# $(MKINSTALLDIRS) $(DESTDIR)$(mandir)/man$(SECTION)
# $(INSTALL_DATA) $? $(DESTDIR)$(mandir)/man$(SECTION)
dist-local:
clean-local:
test -z "$(CLEANFILES)" || $(RMV) $(CLEANFILES)
$(RMV) $(TARGET) $(OBJECTS)
distclean-local: clean-local
$(RMV) Makefile
check-local: all-local
@echo Testing test-nochange.cpp
@$(INVOKE) $(TARGET) $(srcdir)/test-nochange.cpp ./output-nochange.cpp
@-diff -I '#line.*' $(srcdir)/test-nochange.cpp ./output-nochange.cpp || (echo >&2 'warning: diff failed or not found' ; true)
@echo Testing test-simple.cpp
@$(INVOKE) $(TARGET) $(srcdir)/test-simple.cpp ./output-simple.cpp
@-diff -I '#line.*' $(srcdir)/expect-simple.cpp ./output-simple.cpp || (echo >&2 'warning: diff failed or not found' ; true)
Makefile: $(srcdir)/Makefile.in $(top_builddir)/config.status
cd $(top_builddir) \
&& CONFIG_FILES=$(subdir)/$@ CONFIG_HEADERS= $(SHELL) ./config.status
$(TARGET) : $(OBJECTS)
$(LINK.cc) $(OUTOPT)$@ $^ $(LIBS)
$(POST_BUILD_STEP)
%.$(SECTION): $(srcdir)/%.$(SECTION).in
cd $(top_builddir) \
&& CONFIG_FILES=$(subdir)/$@ CONFIG_HEADERS= $(SHELL) ./config.status
ifeq (,$(MAKECMDGOALS))
-include $(DEPS)
else
ifneq ($(patsubst %clean,,$(MAKECMDGOALS)),)
-include $(DEPS)
endif
endif

View File

@ -0,0 +1,378 @@
// © 2016 and later: Unicode, Inc. and others.
// License & terms of use: http://www.unicode.org/copyright.html
#include <stdio.h>
#include <string>
#include <stdlib.h>
#include <unistd.h>
#include <errno.h>
#include <string.h>
#include <iostream>
#include <fstream>
// with caution:
#include "unicode/utf8.h"
static const char
kSPACE = 0x20,
kTAB = 0x09,
kLF = 0x0A,
kCR = 0x0D,
// kHASH = 0x23,
// kSLASH = 0x2f,
kBKSLASH = 0x5C,
// kSTAR = 0x2A,
kL_U = 0x75,
kU_U = 0x55,
kQUOT = 0x27,
kDBLQ = 0x22;
std::string prog;
void usage() {
fprintf(stderr, "%s: usage: %s infile.cpp outfile.cpp\n", prog.c_str(), prog.c_str());
}
int cleanup(const std::string &outfile) {
const char *outstr = outfile.c_str();
if(outstr && *outstr) {
int rc = unlink(outstr);
if(rc == 0) {
fprintf(stderr, "%s: deleted %s\n", prog.c_str(), outstr);
return 0;
} else {
if( errno == ENOENT ) {
return 0; // File did not exist - no error.
} else {
perror("unlink");
return 1;
}
}
}
return 0;
}
// inline bool hasNonAscii(const char *line, size_t len) {
// const unsigned char *uline = reinterpret_cast<const unsigned char*>(line);
// for(size_t i=0;i<len; i++) {
// if( uline[i] > 0x7F) {
// return true;
// }
// }
// return false;
// }
inline const char *skipws(const char *p, const char *e) {
for(;p<e;p++) {
switch(*p) {
case kSPACE:
case kTAB:
case kLF:
case kCR:
break;
default:
return p; // non ws
}
}
return p;
}
// inline bool isCommentOrEmpty(const char* line, size_t len) {
// const char *p = line;
// const char *e = line+len;
// p = skipws(p,e);
// if(p==e) {
// return true; // whitespace only
// }
// p++;
// switch(*p) {
// case kHASH: return true; // #directive
// case kSLASH:
// p++;
// if(p==e) return false; // single slash
// switch(*p) {
// case kSLASH: // '/ /'
// case kSTAR: // '/ *'
// return true; // start of comment
// default: return false; // something else
// }
// default: return false; // something else
// }
// /*NOTREACHED*/
// }
void appendByte(std::string &outstr,
uint8_t byte) {
char tmp2[5];
sprintf(tmp2, "\\x%02X", 0xFF & (int)(byte));
outstr += tmp2;
}
/**
* @return true on failure
*/
bool appendUtf8(std::string &outstr,
const std::string &linestr,
size_t &pos,
size_t chars) {
char tmp[9];
for(size_t i=0;i<chars;i++) {
tmp[i] = linestr[++pos];
}
tmp[chars] = 0;
unsigned int c;
sscanf(tmp, "%X", &c);
UChar32 ch = c & 0x1FFFFF;
// now to append \\x%% etc
uint8_t bytesNeeded = U8_LENGTH(ch);
if(bytesNeeded == 0) {
fprintf(stderr, "Illegal code point U+%X\n", ch);
return true;
}
uint8_t bytes[4];
uint8_t *s = bytes;
size_t i = 0;
U8_APPEND_UNSAFE(s, i, ch);
for(size_t t = 0; t<i; t++) {
appendByte(outstr, s[t]);
}
return false;
}
/**
* @param linestr string to mutate. Already escaped into \u format.
* @param origpos beginning, points to 'u8"'
* @param pos end, points to "
* @return false for no-problem, true for failure!
*/
bool fixu8(std::string &linestr, size_t origpos, size_t &endpos) {
size_t pos = origpos + 3;
std::string outstr;
outstr += (kDBLQ);
for(;pos<endpos;pos++) {
char c = linestr[pos];
if(c == kBKSLASH) {
char c2 = linestr[++pos];
switch(c2) {
case kQUOT:
case kDBLQ:
appendByte(outstr, c2);
break;
case kL_U:
appendUtf8(outstr, linestr, pos, 4);
break;
case kU_U:
appendUtf8(outstr, linestr, pos, 8);
break;
}
} else {
appendByte(outstr, c);
}
}
outstr += (kDBLQ);
linestr.replace(origpos, (endpos-origpos+1), outstr);
return false; // OK
}
/**
* fix the string at the position
* false = no err
* true = had err
*/
bool fixAt(std::string &linestr, size_t pos) {
size_t origpos = pos;
if(linestr[pos] != 'u') {
fprintf(stderr, "Not a 'u'?");
return true;
}
pos++; // past 'u'
bool utf8 = false;
if(linestr[pos] == '8') { // u8"
utf8 = true;
pos++;
}
char quote = linestr[pos];
if(quote != '\'' && quote != '\"') {
fprintf(stderr, "Quote is '%c' - not sure what to do.\n", quote);
return true;
}
if(quote == '\'' && utf8) {
fprintf(stderr, "Cannot do u8'...'\n");
return true;
}
pos ++;
//printf("u%c…%c\n", quote, quote);
for(; pos < linestr.size(); pos++) {
if(linestr[pos] == quote) {
if(utf8) {
return fixu8(linestr, origpos, pos); // fix u8"..."
} else {
return false; // end of quote
}
}
if(linestr[pos] == '\\') {
pos++;
if(linestr[pos] == quote) continue; // quoted quote
if(linestr[pos] == 'u') continue; // for now ... unicode escape
if(linestr[pos] == '\\') continue;
// some other escape… ignore
} else {
// Proceed to decode utf-8
const uint8_t *s = (const uint8_t*) (linestr.c_str());
int32_t i = pos;
int32_t length = linestr.size();
UChar32 c;
if(U8_IS_SINGLE((uint8_t)s[i])) continue; // single code point
{
U8_NEXT(s, i, length, c);
}
if(c<0) {
fprintf(stderr, "Illegal utf-8 sequence\n");
return true;
}
size_t seqLen = (i-pos);
//printf("U+%04X pos %d [len %d]\n", c, pos, seqLen);fflush(stdout);
char newSeq[20];
if( c <= 0xFFFF) {
sprintf(newSeq, "\\u%04X", c);
} else {
sprintf(newSeq, "\\U%08X", c);
}
linestr.replace(pos, seqLen, newSeq);
pos += strlen(newSeq) - 1;
}
}
return false;
}
/**
* false = no err
* true = had err
*/
bool fixLine(int /*no*/, std::string &linestr) {
const char *line = linestr.c_str();
size_t len = linestr.size();
// no u' in the line?
if(!strstr(line, "u'") && !strstr(line, "u\"") && !strstr(line, "u8\"")) {
return false; // Nothing to do. No u' or u" detected
}
// lines such as u8"\u0308" are all ASCII.
// // Quick Check: all ascii?
// if(!hasNonAscii(line, len)) {
// return false; // ASCII
// }
// // comment or empty line?
// if(isCommentOrEmpty(line, len)) {
// return false; // Comment or just empty
// }
// start from the end and find all u" cases
size_t pos = len = linestr.size();
while((pos>0) && (pos = linestr.rfind("u\"", pos)) != std::string::npos) {
//printf("found doublequote at %d\n", pos);
if(fixAt(linestr, pos)) return true;
if(pos == 0) break;
pos--;
}
// reset and find all u' cases
pos = len = linestr.size();
while((pos>0) && (pos = linestr.rfind("u'", pos)) != std::string::npos) {
//printf("found singlequote at %d\n", pos);
if(fixAt(linestr, pos)) return true;
if(pos == 0) break;
pos--;
}
// reset and find all u8" cases
pos = len = linestr.size();
while((pos>0) && (pos = linestr.rfind("u8\"", pos)) != std::string::npos) {
if(fixAt(linestr, pos)) return true;
if(pos == 0) break;
pos--;
}
//fprintf(stderr, "%d - fixed\n", no);
return false;
}
int convert(const std::string &infile, const std::string &outfile) {
fprintf(stderr, "escapesrc: %s -> %s\n", infile.c_str(), outfile.c_str());
std::ifstream inf;
inf.open(infile.c_str(), std::ios::in);
if(!inf.is_open()) {
fprintf(stderr, "%s: could not open input file %s\n", prog.c_str(), infile.c_str());
cleanup(outfile);
return 1;
}
std::ofstream outf;
outf.open(outfile.c_str(), std::ios::out);
if(!outf.is_open()) {
fprintf(stderr, "%s: could not open output file %s\n", prog.c_str(), outfile.c_str());
return 1;
}
// TODO: any platform variations of #line?
outf << "#line 1 \"" << infile << "\"" << '\n';
int no = 0;
std::string linestr;
while( getline( inf, linestr)) {
no++;
if(fixLine(no, linestr)) {
outf.close();
fprintf(stderr, "%s:%d: Fixup failed by %s\n", infile.c_str(), no, prog.c_str());
cleanup(outfile);
return 1;
}
outf << linestr << '\n';
}
return 0;
}
int main(int argc, const char *argv[]) {
prog = argv[0];
if(argc != 3) {
usage();
return 1;
}
std::string infile = argv[1];
std::string outfile = argv[2];
return convert(infile, outfile);
}
#include "utf_impl.cpp"

View File

@ -0,0 +1,16 @@
// © 2016 and later: Unicode, Inc. and others.
// License & terms of use: http://www.unicode.org/copyright.html
u"sa\u0127\u0127a";
u'\u6587';
u"\U000219F2";
u"sa\u0127\u0127a";
u'\u6587'; u"\U000219F2";
"\x20\xCC\x81";
"\xCC\x88\x20";
"\x73\x61\xC4\xA7\xC4\xA7\x61";
"\xE6\x96\x87";
"\xF0\xA1\xA7\xB2";
"\x73\x61\xC4\xA7\xC4\xA7\x61";

View File

@ -0,0 +1,5 @@
// © 2016 and later: Unicode, Inc. and others.
// License & terms of use: http://www.unicode.org/copyright.html
// This is a source file with no changes needed in it.
// In fact, the only non-ASCII character is the comment line at top.

View File

@ -0,0 +1,16 @@
// © 2016 and later: Unicode, Inc. and others.
// License & terms of use: http://www.unicode.org/copyright.html
u"saħħa";
u'';
u"𡧲";
u"saħħa";
u''; u"𡧲";
u8" \u0301";
u8"\u0308 ";
u8"saħħa";
u8"";
u8"𡧲";
u8"saħ\u0127a";

View File

@ -24,7 +24,7 @@ CLEANFILES = *~ $(DEPS)
## Target information
TARGET = $(BINDIR)/$(TARGET_STUB_NAME)$(EXEEXT)
CPPFLAGS += -I$(top_srcdir)/common -I$(srcdir)/../toolutil
CPPFLAGS += -I$(srcdir) -I$(top_srcdir)/common -I$(srcdir)/../toolutil
LIBS = $(LIBICUTOOLUTIL) $(LIBICUI18N) $(LIBICUUC) $(DEFAULT_LIBS) $(LIB_M)
OBJECTS = gennorm2.o n2builder.o

View File

@ -33,7 +33,7 @@ TARGET = $(BINDIR)/$(TARGET_STUB_NAME)$(EXEEXT)
# derb depends on icuio
@ICUIO_TRUE@DERB = $(BINDIR)/$(DERB_STUB_NAME)$(EXEEXT)
CPPFLAGS += -I$(top_srcdir)/common -I$(top_srcdir)/i18n -I$(srcdir)/../toolutil -I$(top_srcdir)/io
CPPFLAGS += -I$(srcdir) -I$(top_srcdir)/common -I$(top_srcdir)/i18n -I$(srcdir)/../toolutil -I$(top_srcdir)/io
CPPFLAGS += -DUNISTR_FROM_CHAR_EXPLICIT=explicit -DUNISTR_FROM_STRING_EXPLICIT=explicit
LIBS = $(LIBICUTOOLUTIL) $(LIBICUI18N) $(LIBICUUC) $(DEFAULT_LIBS) $(LIB_M)

View File

@ -28,7 +28,7 @@ CLEANFILES = *~ $(DEPS) $(MAN_FILES)
## Target information
TARGET = $(BINDIR)/$(TARGET_STUB_NAME)$(EXEEXT)
CPPFLAGS += -I$(top_srcdir)/common -I$(srcdir)/../toolutil
CPPFLAGS += -I$(srcdir) -I$(top_srcdir)/common -I$(srcdir)/../toolutil
LIBS = $(LIBICUTOOLUTIL) $(LIBICUI18N) $(LIBICUUC) $(DEFAULT_LIBS) $(LIB_M)
OBJECTS = makeconv.o ucnvstat.o genmbcs.o gencnvex.o

View File

@ -32,7 +32,7 @@ endif
## Target information
TARGET = $(BINDIR)/$(TARGET_STUB_NAME)$(EXEEXT)
CPPFLAGS += -I$(top_srcdir)/common -I$(srcdir)/../toolutil
CPPFLAGS += -I$(srcdir) -I$(top_srcdir)/common -I$(srcdir)/../toolutil
DEFS += -DUDATA_SO_SUFFIX=\".$(SO)\" -DSTATIC_O=\"$(STATIC_O)\"
LIBS = $(LIBICUTOOLUTIL) $(LIBICUI18N) $(LIBICUUC) $(DEFAULT_LIBS) $(LIB_M)

View File

@ -45,7 +45,7 @@ DYNAMICCXXFLAGS = $(SHAREDLIBCXXFLAGS)
CFLAGS += $(LIBCFLAGS)
CXXFLAGS += $(LIBCXXFLAGS)
CPPFLAGS += -I$(top_srcdir)/common -I$(top_srcdir)/i18n $(LIBCPPFLAGS)
CPPFLAGS += -I$(srcdir) -I$(top_srcdir)/common -I$(top_srcdir)/i18n $(LIBCPPFLAGS)
# from icuinfo
CPPFLAGS+= "-DU_BUILD=\"@build@\"" "-DU_HOST=\"@host@\"" "-DU_CC=\"@CC@\"" "-DU_CXX=\"@CXX@\""