Disable the use of UTF-8 by default in Unix builds.
Add up-to-date description of UTF-8 support to the Unicode overview. git-svn-id: https://svn.wxwidgets.org/svn/wx/wxWidgets/trunk@71424 c3d73ce0-8a6f-49c7-b76d-6d57e0e08775
This commit is contained in:
parent
458f68d0a7
commit
bf0f2c4b33
16
configure
vendored
16
configure
vendored
@ -3038,17 +3038,16 @@ DEFAULT_wxUSE_LIBSDL=no
|
|||||||
DEFAULT_wxUSE_ACCESSIBILITY=no
|
DEFAULT_wxUSE_ACCESSIBILITY=no
|
||||||
DEFAULT_wxUSE_IPV6=no
|
DEFAULT_wxUSE_IPV6=no
|
||||||
DEFAULT_wxUSE_GSTREAMER8=no
|
DEFAULT_wxUSE_GSTREAMER8=no
|
||||||
|
DEFAULT_wxUSE_UNICODE_UTF8=no
|
||||||
|
DEFAULT_wxUSE_UNICODE_UTF8_LOCALE=no
|
||||||
|
|
||||||
DEFAULT_wxUSE_ARTPROVIDER_TANGO=auto
|
DEFAULT_wxUSE_ARTPROVIDER_TANGO=auto
|
||||||
DEFAULT_wxUSE_UNICODE_UTF8=auto
|
|
||||||
DEFAULT_wxUSE_OPENGL=auto
|
DEFAULT_wxUSE_OPENGL=auto
|
||||||
DEFAULT_wxUSE_MEDIACTRL=auto
|
DEFAULT_wxUSE_MEDIACTRL=auto
|
||||||
DEFAULT_wxUSE_COMPILER_TLS=auto
|
DEFAULT_wxUSE_COMPILER_TLS=auto
|
||||||
DEFAULT_wxUSE_HOTKEY=auto
|
DEFAULT_wxUSE_HOTKEY=auto
|
||||||
DEFAULT_wxUSE_METAFILE=auto
|
DEFAULT_wxUSE_METAFILE=auto
|
||||||
|
|
||||||
DEFAULT_wxUSE_UNICODE_UTF8_LOCALE=no
|
|
||||||
|
|
||||||
DEFAULT_wxUSE_UNIVERSAL_BINARY=no
|
DEFAULT_wxUSE_UNIVERSAL_BINARY=no
|
||||||
DEFAULT_wxUSE_MAC_ARCH=no
|
DEFAULT_wxUSE_MAC_ARCH=no
|
||||||
|
|
||||||
@ -34345,17 +34344,6 @@ fi
|
|||||||
|
|
||||||
|
|
||||||
|
|
||||||
if test "$wxUSE_UNICODE" = "yes" -a "$wxUSE_UNICODE_UTF8" = "auto" ; then
|
|
||||||
if test "$USE_UNIX" = 1 -a "$wxUSE_DARWIN" != 1 ; then
|
|
||||||
wxUSE_UNICODE_UTF8=yes
|
|
||||||
elif test "$USE_OS2" = 1 ; then
|
|
||||||
wxUSE_UNICODE_UTF8=yes
|
|
||||||
else
|
|
||||||
wxUSE_UNICODE_UTF8=no
|
|
||||||
fi
|
|
||||||
fi
|
|
||||||
|
|
||||||
|
|
||||||
if test "$wxUSE_GUI" = "yes"; then
|
if test "$wxUSE_GUI" = "yes"; then
|
||||||
if test "$wxUSE_UNIX" = "yes" -a "$wxUSE_PM" != 1; then
|
if test "$wxUSE_UNIX" = "yes" -a "$wxUSE_PM" != 1; then
|
||||||
|
|
||||||
|
24
configure.in
24
configure.in
@ -385,18 +385,17 @@ dnl features disabled by default
|
|||||||
DEFAULT_wxUSE_ACCESSIBILITY=no
|
DEFAULT_wxUSE_ACCESSIBILITY=no
|
||||||
DEFAULT_wxUSE_IPV6=no
|
DEFAULT_wxUSE_IPV6=no
|
||||||
DEFAULT_wxUSE_GSTREAMER8=no
|
DEFAULT_wxUSE_GSTREAMER8=no
|
||||||
|
DEFAULT_wxUSE_UNICODE_UTF8=no
|
||||||
|
DEFAULT_wxUSE_UNICODE_UTF8_LOCALE=no
|
||||||
|
|
||||||
dnl automatic features
|
dnl automatic features
|
||||||
DEFAULT_wxUSE_ARTPROVIDER_TANGO=auto
|
DEFAULT_wxUSE_ARTPROVIDER_TANGO=auto
|
||||||
DEFAULT_wxUSE_UNICODE_UTF8=auto
|
|
||||||
DEFAULT_wxUSE_OPENGL=auto
|
DEFAULT_wxUSE_OPENGL=auto
|
||||||
DEFAULT_wxUSE_MEDIACTRL=auto
|
DEFAULT_wxUSE_MEDIACTRL=auto
|
||||||
DEFAULT_wxUSE_COMPILER_TLS=auto
|
DEFAULT_wxUSE_COMPILER_TLS=auto
|
||||||
DEFAULT_wxUSE_HOTKEY=auto
|
DEFAULT_wxUSE_HOTKEY=auto
|
||||||
DEFAULT_wxUSE_METAFILE=auto
|
DEFAULT_wxUSE_METAFILE=auto
|
||||||
|
|
||||||
DEFAULT_wxUSE_UNICODE_UTF8_LOCALE=no
|
|
||||||
|
|
||||||
dnl Mac/Cocoa users need to enable building universal binaries explicitly
|
dnl Mac/Cocoa users need to enable building universal binaries explicitly
|
||||||
DEFAULT_wxUSE_UNIVERSAL_BINARY=no
|
DEFAULT_wxUSE_UNIVERSAL_BINARY=no
|
||||||
DEFAULT_wxUSE_MAC_ARCH=no
|
DEFAULT_wxUSE_MAC_ARCH=no
|
||||||
@ -3610,25 +3609,6 @@ else
|
|||||||
fi
|
fi
|
||||||
|
|
||||||
|
|
||||||
dnl ---------------------------------------------------------------------------
|
|
||||||
dnl UTF-8 support
|
|
||||||
dnl ---------------------------------------------------------------------------
|
|
||||||
|
|
||||||
dnl If UTF-8 support wasn't explicitly enabled or disabled, enable it only
|
|
||||||
dnl for ports where it makes sense by default (GTK+, DirectFB):
|
|
||||||
if test "$wxUSE_UNICODE" = "yes" -a "$wxUSE_UNICODE_UTF8" = "auto" ; then
|
|
||||||
if test "$USE_UNIX" = 1 -a "$wxUSE_DARWIN" != 1 ; then
|
|
||||||
wxUSE_UNICODE_UTF8=yes
|
|
||||||
elif test "$USE_OS2" = 1 ; then
|
|
||||||
dnl wide char support is quite incomplete in libc;
|
|
||||||
dnl UTF-8 might actually work when evaluating/setting
|
|
||||||
dnl code pages correctly, even for ports other than GTK20.
|
|
||||||
wxUSE_UNICODE_UTF8=yes
|
|
||||||
else
|
|
||||||
wxUSE_UNICODE_UTF8=no
|
|
||||||
fi
|
|
||||||
fi
|
|
||||||
|
|
||||||
dnl ---------------------------------------------------------------------------
|
dnl ---------------------------------------------------------------------------
|
||||||
dnl Optional libraries included when system library is not used
|
dnl Optional libraries included when system library is not used
|
||||||
dnl ---------------------------------------------------------------------------
|
dnl ---------------------------------------------------------------------------
|
||||||
|
@ -126,11 +126,14 @@ terms reported above.
|
|||||||
|
|
||||||
@section overview_unicode_supportin Unicode Support in wxWidgets
|
@section overview_unicode_supportin Unicode Support in wxWidgets
|
||||||
|
|
||||||
Since wxWidgets 3.0 Unicode support is always enabled and building the library
|
@subsection overview_unicode_support_default Unicode is Always Used by Default
|
||||||
without it is not recommended any longer and will cease to be supported in the
|
|
||||||
near future. This means that internally only Unicode strings are used and that,
|
Since wxWidgets 3.0 Unicode support is always enabled and while building the
|
||||||
under Microsoft Windows, Unicode system API is used which means that wxWidgets
|
library without it is still possible, it is not recommended any longer and will
|
||||||
programs require the Microsoft Layer for Unicode to run on Windows 95/98/ME.
|
cease to be supported in the near future. This means that internally only
|
||||||
|
Unicode strings are used and that, under Microsoft Windows, Unicode system API
|
||||||
|
is used which means that wxWidgets programs require the Microsoft Layer for
|
||||||
|
Unicode to run on Windows 95/98/ME.
|
||||||
|
|
||||||
However, unlike the Unicode build mode of the previous versions of wxWidgets, this
|
However, unlike the Unicode build mode of the previous versions of wxWidgets, this
|
||||||
support is mostly transparent: you can still continue to work with the @b narrow
|
support is mostly transparent: you can still continue to work with the @b narrow
|
||||||
@ -181,6 +184,54 @@ in your program there is really nothing special to do. However you should be
|
|||||||
aware of the potential problems covered by the following section.
|
aware of the potential problems covered by the following section.
|
||||||
|
|
||||||
|
|
||||||
|
@subsection overview_unicode_support_utf Choosing Unicode Representation
|
||||||
|
|
||||||
|
wxWidgets uses the system @c wchar_t in wxString implementation by default
|
||||||
|
under all systems. Thus, under Microsoft Windows, UCS-2 (simplified version of
|
||||||
|
UTF-16 without support for surrogate characters) is used as @c wchar_t is 2
|
||||||
|
bytes on this platform. Under Unix systems, including Mac OS X, UCS-4 (also
|
||||||
|
known as UTF-32) is used by default, however it is also possible to build
|
||||||
|
wxWidgets to use UTF-8 internally by passing @c --enable-utf8 option to
|
||||||
|
configure.
|
||||||
|
|
||||||
|
The interface provided by wxString is the same independently of the format used
|
||||||
|
internally. However different formats have specific advantages and
|
||||||
|
disadvantages. Notably, under Unix, the underlying graphical toolkit (e.g.
|
||||||
|
GTK+) usually uses UTF-8 encoded strings and using the same representations for
|
||||||
|
the strings in wxWidgets allows to avoid conversion from UTF-32 to UTF-8 and
|
||||||
|
vice versa each time a string is shown in the UI or retrieved from it. The
|
||||||
|
overhead of such conversions is usually negligible for small strings but may be
|
||||||
|
important for some programs. If you believe that it would be advantageous to
|
||||||
|
use UTF-8 for the strings in your particular application, you may rebuild
|
||||||
|
wxWidgets to use UTF-8 as explained above (notice that this is currently not
|
||||||
|
supported under Microsoft Windows and arguably doesn't make much sense there as
|
||||||
|
Windows itself uses UTF-16 and not UTF-8) but be sure to be aware of the
|
||||||
|
performance implications (see @ref overview_unicode_performance) of using UTF-8
|
||||||
|
in wxString before doing this!
|
||||||
|
|
||||||
|
Generally speaking you should only use non-default UTF-8 build in specific
|
||||||
|
circumstances e.g. building for resource-constrained systems where the overhead
|
||||||
|
of conversions (and also reduced memory usage of UTF-8 compared to UTF-32 for
|
||||||
|
the European languages) can be important. If the environment in which your
|
||||||
|
program is running is under your control -- as is quite often the case in such
|
||||||
|
scenarios -- consider ensuring that the system always uses UTF-8 locale and
|
||||||
|
use @c --enable-utf8only configure option to disable support for the other
|
||||||
|
locales and consider all strings to be in UTF-8. This further reduces the code
|
||||||
|
size and removes the need for conversions in more cases.
|
||||||
|
|
||||||
|
|
||||||
|
@subsection overview_unicode_settings Unicode Related Preprocessor Symbols
|
||||||
|
|
||||||
|
@c wxUSE_UNICODE is defined as 1 now to indicate Unicode support. It can be
|
||||||
|
explicitly set to 0 in @c setup.h under MSW or you can use @c --disable-unicode
|
||||||
|
under Unix but doing this is strongly discouraged. By default, @c
|
||||||
|
wxUSE_UNICODE_WCHAR is also defined as 1, however in UTF-8 build (described in
|
||||||
|
the previous section), it is set to 0 and @c wxUSE_UNICODE_UTF8, which is
|
||||||
|
usually 0, is set to 1 instead. In the latter case, @c wxUSE_UTF8_LOCALE_ONLY
|
||||||
|
can also be set to 1 to indicate that all strings are considered to be in UTF-8.
|
||||||
|
|
||||||
|
|
||||||
|
|
||||||
@section overview_unicode_pitfalls Potential Unicode Pitfalls
|
@section overview_unicode_pitfalls Potential Unicode Pitfalls
|
||||||
|
|
||||||
The problems can be separated into three broad classes:
|
The problems can be separated into three broad classes:
|
||||||
@ -280,17 +331,18 @@ wxWidgets 3.0 and the new code should be used with this in mind and ideally
|
|||||||
avoiding implicit conversions to @c char*.
|
avoiding implicit conversions to @c char*.
|
||||||
|
|
||||||
|
|
||||||
@subsection overview_unicode_performance Unicode Performance Implications
|
@subsection overview_unicode_performance Performance Implications of Using UTF-8
|
||||||
|
|
||||||
Under Unix systems wxString class uses variable-width UTF-8 encoding for
|
As mentioned above, under Unix systems wxString class can use variable-width
|
||||||
internal representation and this implies that it can't guarantee constant-time
|
UTF-8 encoding for internal representation. In this case it can't guarantee
|
||||||
access to N-th element of the string any longer as to find the position of this
|
constant-time access to N-th element of the string any longer as to find the
|
||||||
character in the string we have to examine all the preceding ones. Usually this
|
position of this character in the string we have to examine all the preceding
|
||||||
doesn't matter much because most algorithms used on the strings examine them
|
ones. Usually this doesn't matter much because most algorithms used on the
|
||||||
sequentially anyhow and because wxString implements a cache for iterating over
|
strings examine them sequentially anyhow and because wxString implements a
|
||||||
the string by index but it can have serious consequences for algorithms
|
cache for iterating over the string by index but it can have serious
|
||||||
using random access to string elements as they typically acquire O(N^2) time
|
consequences for algorithms using random access to string elements as they
|
||||||
complexity instead of O(N) where N is the length of the string.
|
typically acquire O(N^2) time complexity instead of O(N) where N is the length
|
||||||
|
of the string.
|
||||||
|
|
||||||
Even despite caching the index, indexed access should be replaced with
|
Even despite caching the index, indexed access should be replaced with
|
||||||
sequential access using string iterators. For example a typical loop:
|
sequential access using string iterators. For example a typical loop:
|
||||||
@ -384,15 +436,5 @@ But, once again, none of these cryptic types is really needed if you just pass
|
|||||||
the return value of any of the functions mentioned in this section to another
|
the return value of any of the functions mentioned in this section to another
|
||||||
function directly.
|
function directly.
|
||||||
|
|
||||||
@section overview_unicode_settings Unicode Related Compilation Settings
|
|
||||||
|
|
||||||
@c wxUSE_UNICODE is now defined as @c 1 by default to indicate Unicode support.
|
|
||||||
If UTF-8 is used for the internal storage in wxString, @c wxUSE_UNICODE_UTF8 is
|
|
||||||
also defined, otherwise @c wxUSE_UNICODE_WCHAR is.
|
|
||||||
|
|
||||||
You are encouraged to always use the default build settings of wxWidgets; this avoids
|
|
||||||
the need of different builds of the same application/library because of different
|
|
||||||
"build modes".
|
|
||||||
|
|
||||||
*/
|
*/
|
||||||
|
|
||||||
|
Loading…
Reference in New Issue
Block a user