2006-04-01 12:43:03 +00:00
|
|
|
%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%
|
|
|
|
%% Name: mbconv.tex
|
|
|
|
%% Purpose: wxMBConv documentation
|
|
|
|
%% Author: Ove Kaaven, Vadim Zeitlin
|
|
|
|
%% Created: 2000-03-25
|
|
|
|
%% RCS-ID: $Id$
|
|
|
|
%% Copyright: (c) 2000 Ove Kaaven
|
|
|
|
%% (c) 2003-2006 Vadim Zeitlin
|
|
|
|
%% License: wxWindows license
|
|
|
|
%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%
|
|
|
|
|
2000-07-15 19:51:35 +00:00
|
|
|
|
|
|
|
\section{\class{wxMBConv}}\label{wxmbconv}
|
|
|
|
|
|
|
|
This class is the base class of a hierarchy of classes capable of converting
|
2006-04-01 12:43:03 +00:00
|
|
|
text strings between multibyte (SBCS or DBCS) encodings and Unicode.
|
|
|
|
|
|
|
|
In the documentation for this and related classes please notice that
|
|
|
|
\emph{length} of the string refers to the number of characters in the string
|
|
|
|
not counting the terminating \NUL, if any. While the \emph{size} of the string
|
2006-04-03 22:16:22 +00:00
|
|
|
is the total number of bytes in the string, including any trailing \NUL.
|
2006-04-01 12:43:03 +00:00
|
|
|
Thus, length of wide character string \texttt{L"foo"} is $3$ while its size can
|
|
|
|
be either $8$ or $16$ depending on whether \texttt{wchar\_t} is $2$ bytes (as
|
|
|
|
under Windows) or $4$ (Unix).
|
|
|
|
|
|
|
|
\wxheading{Global variables}
|
|
|
|
|
|
|
|
There are several predefined instances of this class:
|
|
|
|
\begin{twocollist}
|
|
|
|
\twocolitem{\textbf{wxConvLibc}}{Uses the standard ANSI C \texttt{mbstowcs()} and
|
|
|
|
\texttt{wcstombs()} functions to perform the conversions; thus depends on the
|
|
|
|
current locale.}
|
2006-04-11 00:33:18 +00:00
|
|
|
\twocolitem{\textbf{wxConvLocal}}{Another conversion corresponding to the
|
|
|
|
current locale but this one uses the best available conversion.}
|
2006-04-11 00:36:24 +00:00
|
|
|
\twocolitem{\textbf{wxConvUI}}{The conversion used for hte standard UI elements
|
|
|
|
such as menu items and buttons. This is a pointer which is initially set to
|
|
|
|
\texttt{wxConvLocal} as the program uses the current locale by default but can
|
|
|
|
be set to some specific conversion if the program needs to use a specific
|
|
|
|
encoding for its UI.}
|
2006-04-11 00:33:18 +00:00
|
|
|
\twocolitem{\textbf{wxConvISO8859\_1}}{Conversion to and from ISO-8859-1 (Latin I)
|
|
|
|
encoding.}
|
|
|
|
\twocolitem{\textbf{wxConvUTF8}}{Conversion to and from UTF-8 encoding.}
|
2006-04-01 12:43:03 +00:00
|
|
|
\twocolitem{\textbf{wxConvFile}}{The appropriate conversion for the file names,
|
|
|
|
depends on the system.}
|
2006-04-11 00:33:18 +00:00
|
|
|
% \twocolitem{\textbf{wxConvCurrent}}{Not really clear what is it for...}
|
2006-04-01 12:43:03 +00:00
|
|
|
\end{twocollist}
|
2000-07-15 19:51:35 +00:00
|
|
|
|
2006-04-04 12:35:21 +00:00
|
|
|
|
|
|
|
\wxheading{Constants}
|
|
|
|
|
|
|
|
\texttt{wxCONV\_FAILED} value is defined as \texttt{(size\_t)$-1$} and is
|
|
|
|
returned by the conversion functions instead of the length of the converted
|
|
|
|
string if the conversion fails.
|
|
|
|
|
|
|
|
|
2000-07-15 19:51:35 +00:00
|
|
|
\wxheading{Derived from}
|
|
|
|
|
|
|
|
No base class
|
|
|
|
|
|
|
|
\wxheading{Include files}
|
|
|
|
|
|
|
|
<wx/strconv.h>
|
|
|
|
|
2007-07-28 12:21:03 +00:00
|
|
|
\wxheading{Library}
|
|
|
|
|
|
|
|
\helpref{wxBase}{librarieslist}
|
|
|
|
|
2000-07-15 19:51:35 +00:00
|
|
|
\wxheading{See also}
|
|
|
|
|
|
|
|
\helpref{wxCSConv}{wxcsconv},
|
|
|
|
\helpref{wxEncodingConverter}{wxencodingconverter},
|
|
|
|
\helpref{wxMBConv classes overview}{mbconvclasses}
|
|
|
|
|
2006-04-04 12:35:21 +00:00
|
|
|
|
2000-07-15 19:51:35 +00:00
|
|
|
\latexignore{\rtfignore{\wxheading{Members}}}
|
|
|
|
|
|
|
|
|
|
|
|
\membersection{wxMBConv::wxMBConv}\label{wxmbconvwxmbconv}
|
|
|
|
|
|
|
|
\func{}{wxMBConv}{\void}
|
|
|
|
|
2006-04-04 12:35:21 +00:00
|
|
|
Trivial default constructor.
|
|
|
|
|
2000-07-15 19:51:35 +00:00
|
|
|
|
2007-08-26 20:47:54 +00:00
|
|
|
\membersection{wxMBConv::Clone}\label{wxmbconvclone}
|
|
|
|
|
|
|
|
\constfunc{virtual wxMBConv *}{Clone}{\void}
|
|
|
|
|
|
|
|
This pure virtual function is overridden in each of the derived classes to
|
|
|
|
return a new copy of the object it is called on. It is used for copying the
|
|
|
|
conversion objects while preserving their dynamic type.
|
|
|
|
|
|
|
|
|
2000-07-15 19:51:35 +00:00
|
|
|
\membersection{wxMBConv::MB2WC}\label{wxmbconvmb2wc}
|
|
|
|
|
2006-04-01 12:43:03 +00:00
|
|
|
\constfunc{virtual size\_t}{MB2WC}{\param{wchar\_t *}{out}, \param{const char *}{in}, \param{size\_t }{outLen}}
|
|
|
|
|
2006-04-04 12:35:21 +00:00
|
|
|
\deprecated{\helpref{ToWChar}{wxmbconvtowchar}}
|
|
|
|
|
2006-04-01 12:43:03 +00:00
|
|
|
Converts from a string \arg{in} in multibyte encoding to Unicode putting up to
|
|
|
|
\arg{outLen} characters into the buffer \arg{out}.
|
2000-07-15 19:51:35 +00:00
|
|
|
|
2006-04-01 12:43:03 +00:00
|
|
|
If \arg{out} is \NULL, only the length of the string which would result from
|
|
|
|
the conversion is calculated and returned. Note that this is the length and not
|
|
|
|
size, i.e. the returned value does \emph{not} include the trailing \NUL. But
|
|
|
|
when the function is called with a non-\NULL \arg{out} buffer, the \arg{outLen}
|
|
|
|
parameter should be one more to allow to properly \NUL-terminate the string.
|
2002-12-04 14:11:26 +00:00
|
|
|
|
|
|
|
\wxheading{Parameters}
|
|
|
|
|
2006-04-01 12:43:03 +00:00
|
|
|
\docparam{out}{The output buffer, may be \NULL if the caller is only
|
2002-12-04 14:11:26 +00:00
|
|
|
interested in the length of the resulting string}
|
|
|
|
|
2006-04-01 12:43:03 +00:00
|
|
|
\docparam{in}{The \NUL-terminated input string, cannot be \NULL}
|
2002-12-04 14:11:26 +00:00
|
|
|
|
2006-04-01 12:43:03 +00:00
|
|
|
\docparam{outLen}{The length of the output buffer but \emph{including}
|
|
|
|
\NUL, ignored if \arg{out} is \NULL}
|
2002-12-04 14:11:26 +00:00
|
|
|
|
|
|
|
\wxheading{Return value}
|
|
|
|
|
2006-04-03 22:16:22 +00:00
|
|
|
The length of the converted string \emph{excluding} the trailing \NUL.
|
2006-04-01 12:43:03 +00:00
|
|
|
|
2000-07-15 19:51:35 +00:00
|
|
|
|
|
|
|
\membersection{wxMBConv::WC2MB}\label{wxmbconvwc2mb}
|
|
|
|
|
|
|
|
\constfunc{virtual size\_t}{WC2MB}{\param{char* }{buf}, \param{const wchar\_t* }{psz}, \param{size\_t }{n}}
|
|
|
|
|
2006-04-04 12:35:21 +00:00
|
|
|
\deprecated{\helpref{FromWChar}{wxmbconvfromwchar}}
|
|
|
|
|
2002-12-04 14:11:26 +00:00
|
|
|
Converts from Unicode to multibyte encoding. The semantics of this function
|
|
|
|
(including the return value meaning) is the same as for
|
|
|
|
\helpref{MB2WC}{wxmbconvmb2wc}.
|
|
|
|
|
2006-04-01 12:43:03 +00:00
|
|
|
Notice that when the function is called with a non-\NULL buffer, the
|
|
|
|
{\it n} parameter should be the size of the buffer and so it \emph{should} take
|
2006-04-03 22:16:22 +00:00
|
|
|
into account the trailing \NUL, which might take two or four bytes for some
|
2006-04-01 12:43:03 +00:00
|
|
|
encodings (UTF-16 and UTF-32) and not one.
|
|
|
|
|
2000-07-15 19:51:35 +00:00
|
|
|
|
|
|
|
\membersection{wxMBConv::cMB2WC}\label{wxmbconvcmb2wc}
|
|
|
|
|
2006-04-01 12:43:03 +00:00
|
|
|
\constfunc{const wxWCharBuffer}{cMB2WC}{\param{const char *}{in}}
|
|
|
|
|
|
|
|
\constfunc{const wxWCharBuffer}{cMB2WC}{\param{const char *}{in}, \param{size\_t }{inLen}, \param{size\_t }{*outLen}}
|
|
|
|
|
|
|
|
Converts from multibyte encoding to Unicode by calling
|
|
|
|
\helpref{MB2WC}{wxmbconvmb2wc}, allocating a temporary wxWCharBuffer to hold
|
|
|
|
the result.
|
|
|
|
|
|
|
|
The first overload takes a \NUL-terminated input string. The second one takes a
|
|
|
|
string of exactly the specified length and the string may include or not the
|
2006-04-03 22:16:22 +00:00
|
|
|
trailing \NUL character(s). If the string is not \NUL-terminated, a temporary
|
2006-04-01 12:43:03 +00:00
|
|
|
\NUL-terminated copy of it suitable for passing to \helpref{MB2WC}{wxmbconvmb2wc}
|
|
|
|
is made, so it is more efficient to ensure that the string is does have the
|
|
|
|
appropriate number of \NUL bytes (which is usually $1$ but may be $2$ or $4$
|
2006-04-04 07:49:08 +00:00
|
|
|
for UTF-16 or UTF-32, see \helpref{GetMBNulLen}{wxmbconvgetmbnullen}),
|
|
|
|
especially for long strings.
|
2006-04-01 12:43:03 +00:00
|
|
|
|
|
|
|
If \arg{outLen} is not-\NULL, it receives the length of the converted
|
|
|
|
string.
|
2000-07-15 19:51:35 +00:00
|
|
|
|
|
|
|
|
|
|
|
\membersection{wxMBConv::cWC2MB}\label{wxmbconvcwc2mb}
|
|
|
|
|
2006-04-01 12:43:03 +00:00
|
|
|
\constfunc{const wxCharBuffer}{cWC2MB}{\param{const wchar\_t* }{in}}
|
|
|
|
|
|
|
|
\constfunc{const wxCharBuffer}{cWC2MB}{\param{const wchar\_t* }{in}, \param{size\_t }{inLen}, \param{size\_t }{*outLen}}
|
2000-07-15 19:51:35 +00:00
|
|
|
|
|
|
|
Converts from Unicode to multibyte encoding by calling WC2MB,
|
|
|
|
allocating a temporary wxCharBuffer to hold the result.
|
|
|
|
|
2006-04-01 12:43:03 +00:00
|
|
|
The second overload of this function allows to convert a string of the given
|
|
|
|
length \arg{inLen}, whether it is \NUL-terminated or not (for wide character
|
|
|
|
strings, unlike for the multibyte ones, a single \NUL is always enough).
|
|
|
|
But notice that just as with \helpref{cMB2WC}{wxmbconvmb2wc}, it is more
|
|
|
|
efficient to pass an already terminated string to this function as otherwise a
|
|
|
|
copy is made internally.
|
|
|
|
|
|
|
|
If \arg{outLen} is not-\NULL, it receives the length of the converted
|
|
|
|
string.
|
|
|
|
|
|
|
|
|
2000-07-15 19:51:35 +00:00
|
|
|
\membersection{wxMBConv::cMB2WX}\label{wxmbconvcmb2wx}
|
|
|
|
|
|
|
|
\constfunc{const char*}{cMB2WX}{\param{const char* }{psz}}
|
|
|
|
|
|
|
|
\constfunc{const wxWCharBuffer}{cMB2WX}{\param{const char* }{psz}}
|
|
|
|
|
|
|
|
Converts from multibyte encoding to the current wxChar type
|
|
|
|
(which depends on whether wxUSE\_UNICODE is set to 1). If wxChar is char,
|
|
|
|
it returns the parameter unaltered. If wxChar is wchar\_t, it returns the
|
|
|
|
result in a wxWCharBuffer. The macro wxMB2WXbuf is defined as the correct
|
|
|
|
return type (without const).
|
|
|
|
|
2006-04-01 12:43:03 +00:00
|
|
|
|
2000-07-15 19:51:35 +00:00
|
|
|
\membersection{wxMBConv::cWX2MB}\label{wxmbconvcwx2mb}
|
|
|
|
|
|
|
|
\constfunc{const char*}{cWX2MB}{\param{const wxChar* }{psz}}
|
|
|
|
|
|
|
|
\constfunc{const wxCharBuffer}{cWX2MB}{\param{const wxChar* }{psz}}
|
|
|
|
|
|
|
|
Converts from the current wxChar type to multibyte encoding. If wxChar is char,
|
|
|
|
it returns the parameter unaltered. If wxChar is wchar\_t, it returns the
|
|
|
|
result in a wxCharBuffer. The macro wxWX2MBbuf is defined as the correct
|
|
|
|
return type (without const).
|
|
|
|
|
2006-04-01 12:43:03 +00:00
|
|
|
|
2000-07-15 19:51:35 +00:00
|
|
|
\membersection{wxMBConv::cWC2WX}\label{wxmbconvcwc2wx}
|
|
|
|
|
|
|
|
\constfunc{const wchar\_t*}{cWC2WX}{\param{const wchar\_t* }{psz}}
|
|
|
|
|
|
|
|
\constfunc{const wxCharBuffer}{cWC2WX}{\param{const wchar\_t* }{psz}}
|
|
|
|
|
|
|
|
Converts from Unicode to the current wxChar type. If wxChar is wchar\_t,
|
|
|
|
it returns the parameter unaltered. If wxChar is char, it returns the
|
|
|
|
result in a wxCharBuffer. The macro wxWC2WXbuf is defined as the correct
|
|
|
|
return type (without const).
|
|
|
|
|
2006-04-01 12:43:03 +00:00
|
|
|
|
2000-07-15 19:51:35 +00:00
|
|
|
\membersection{wxMBConv::cWX2WC}\label{wxmbconvcwx2wc}
|
|
|
|
|
|
|
|
\constfunc{const wchar\_t*}{cWX2WC}{\param{const wxChar* }{psz}}
|
|
|
|
|
|
|
|
\constfunc{const wxWCharBuffer}{cWX2WC}{\param{const wxChar* }{psz}}
|
|
|
|
|
|
|
|
Converts from the current wxChar type to Unicode. If wxChar is wchar\_t,
|
|
|
|
it returns the parameter unaltered. If wxChar is char, it returns the
|
|
|
|
result in a wxWCharBuffer. The macro wxWX2WCbuf is defined as the correct
|
|
|
|
return type (without const).
|
|
|
|
|
2006-04-04 07:49:08 +00:00
|
|
|
|
2006-04-04 12:35:21 +00:00
|
|
|
\membersection{wxMBConv::FromWChar}\label{wxmbconvfromwchar}
|
|
|
|
|
2007-05-02 13:00:28 +00:00
|
|
|
\constfunc{virtual size\_t}{FromWChar}{\param{char\_t *}{dst}, \param{size\_t }{dstLen}, \param{const wchar\_t *}{src}, \param{size\_t }{srcLen = wxNO\_LEN}}
|
2006-04-04 12:35:21 +00:00
|
|
|
|
2007-05-02 13:00:28 +00:00
|
|
|
This function has the same semantics as \helpref{ToWChar}{wxmbconvtowchar}
|
|
|
|
except that it converts a wide string to multibyte one.
|
2006-04-04 12:35:21 +00:00
|
|
|
|
|
|
|
\membersection{wxMBConv::GetMaxMBNulLen}\label{wxmbconvgetmaxmbnullen}
|
|
|
|
|
|
|
|
\func{const size\_t}{GetMaxMBNulLen}{\void}
|
|
|
|
|
|
|
|
Returns the maximal value which can be returned by
|
|
|
|
\helpref{GetMBNulLen}{wxmbconvgetmbnullen} for any conversion object. Currently
|
|
|
|
this value is $4$.
|
|
|
|
|
|
|
|
This method can be used to allocate the buffer with enough space for the
|
|
|
|
trailing \NUL characters for any encoding.
|
|
|
|
|
|
|
|
|
2006-04-04 07:49:08 +00:00
|
|
|
\membersection{wxMBConv::GetMBNulLen}\label{wxmbconvgetmbnullen}
|
|
|
|
|
|
|
|
\constfunc{size\_t}{GetMBNulLen}{\void}
|
|
|
|
|
|
|
|
This function returns $1$ for most of the multibyte encodings in which the
|
|
|
|
string is terminated by a single \NUL, $2$ for UTF-16 and $4$ for UTF-32 for
|
|
|
|
which the string is terminated with $2$ and $4$ \NUL characters respectively.
|
2007-01-11 16:41:43 +00:00
|
|
|
The other cases are not currently supported and \texttt{wxCONV\_FAILED}
|
|
|
|
(defined as $-1$) is returned for them.
|
2006-04-04 07:49:08 +00:00
|
|
|
|
|
|
|
|
2006-04-04 12:35:21 +00:00
|
|
|
\membersection{wxMBConv::ToWChar}\label{wxmbconvtowchar}
|
|
|
|
|
2007-05-02 13:00:28 +00:00
|
|
|
\constfunc{virtual size\_t}{ToWChar}{\param{wchar\_t *}{dst}, \param{size\_t }{dstLen}, \param{const char *}{src}, \param{size\_t }{srcLen = wxNO\_LEN}}
|
2006-04-04 12:35:21 +00:00
|
|
|
|
2007-05-02 13:00:28 +00:00
|
|
|
The most general function for converting a multibyte string to a wide string.
|
|
|
|
The main case is when \arg{dst} is not \NULL and \arg{srcLen} is not
|
|
|
|
\texttt{wxNO\_LEN} (which is defined as \texttt{(size\_t)$-1$}): then
|
|
|
|
the function converts exactly \arg{srcLen} bytes starting at \arg{src} into
|
|
|
|
wide string which it output to \arg{dst}. If the length of the resulting wide
|
|
|
|
string is greater than \arg{dstLen}, an error is returned. Note that if
|
|
|
|
\arg{srcLen} bytes don't include \NUL characters, the resulting wide string is
|
|
|
|
not \NUL-terminated neither.
|
2006-04-04 12:35:21 +00:00
|
|
|
|
2007-05-02 13:00:28 +00:00
|
|
|
If \arg{srcLen} is \texttt{wxNO\_LEN}, the function supposes that the string is
|
|
|
|
properly (i.e. as necessary for the encoding handled by this conversion)
|
|
|
|
\NUL-terminated and converts the entire string, including any trailing \NUL
|
|
|
|
bytes. In this case the wide string is also \NUL-terminated.
|
|
|
|
|
|
|
|
Finally, if \arg{dst} is \NULL, the function returns the length of the needed
|
|
|
|
buffer.
|
|
|
|
|
|
|
|
\wxheading{Return value}
|
|
|
|
|
|
|
|
The number of characters written to \arg{dst} (or the number of characters
|
|
|
|
which would have been written to it if it were non-\NULL) on success or
|
|
|
|
\texttt{wxCONV\_FAILED} on error.
|
2006-04-04 12:35:21 +00:00
|
|
|
|
2007-05-02 13:01:12 +00:00
|
|
|
|