2006-04-01 12:43:03 +00:00
|
|
|
%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%
|
|
|
|
%% Name: mbconv.tex
|
|
|
|
%% Purpose: wxMBConv documentation
|
|
|
|
%% Author: Ove Kaaven, Vadim Zeitlin
|
|
|
|
%% Created: 2000-03-25
|
|
|
|
%% RCS-ID: $Id$
|
|
|
|
%% Copyright: (c) 2000 Ove Kaaven
|
|
|
|
%% (c) 2003-2006 Vadim Zeitlin
|
|
|
|
%% License: wxWindows license
|
|
|
|
%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%
|
|
|
|
|
2000-07-15 19:51:35 +00:00
|
|
|
|
|
|
|
\section{\class{wxMBConv}}\label{wxmbconv}
|
|
|
|
|
|
|
|
This class is the base class of a hierarchy of classes capable of converting
|
2006-04-01 12:43:03 +00:00
|
|
|
text strings between multibyte (SBCS or DBCS) encodings and Unicode.
|
|
|
|
|
|
|
|
In the documentation for this and related classes please notice that
|
|
|
|
\emph{length} of the string refers to the number of characters in the string
|
|
|
|
not counting the terminating \NUL, if any. While the \emph{size} of the string
|
2006-04-03 22:16:22 +00:00
|
|
|
is the total number of bytes in the string, including any trailing \NUL.
|
2006-04-01 12:43:03 +00:00
|
|
|
Thus, length of wide character string \texttt{L"foo"} is $3$ while its size can
|
|
|
|
be either $8$ or $16$ depending on whether \texttt{wchar\_t} is $2$ bytes (as
|
|
|
|
under Windows) or $4$ (Unix).
|
|
|
|
|
|
|
|
\wxheading{Global variables}
|
|
|
|
|
|
|
|
There are several predefined instances of this class:
|
|
|
|
\begin{twocollist}
|
|
|
|
\twocolitem{\textbf{wxConvLibc}}{Uses the standard ANSI C \texttt{mbstowcs()} and
|
|
|
|
\texttt{wcstombs()} functions to perform the conversions; thus depends on the
|
|
|
|
current locale.}
|
|
|
|
\twocolitem{\textbf{wxConvFile}}{The appropriate conversion for the file names,
|
|
|
|
depends on the system.}
|
|
|
|
\end{twocollist}
|
2000-07-15 19:51:35 +00:00
|
|
|
|
|
|
|
\wxheading{Derived from}
|
|
|
|
|
|
|
|
No base class
|
|
|
|
|
|
|
|
\wxheading{Include files}
|
|
|
|
|
|
|
|
<wx/strconv.h>
|
|
|
|
|
|
|
|
\wxheading{See also}
|
|
|
|
|
|
|
|
\helpref{wxCSConv}{wxcsconv},
|
|
|
|
\helpref{wxEncodingConverter}{wxencodingconverter},
|
|
|
|
\helpref{wxMBConv classes overview}{mbconvclasses}
|
|
|
|
|
|
|
|
\latexignore{\rtfignore{\wxheading{Members}}}
|
|
|
|
|
|
|
|
|
|
|
|
\membersection{wxMBConv::wxMBConv}\label{wxmbconvwxmbconv}
|
|
|
|
|
|
|
|
\func{}{wxMBConv}{\void}
|
|
|
|
|
|
|
|
Constructor.
|
|
|
|
|
|
|
|
\membersection{wxMBConv::MB2WC}\label{wxmbconvmb2wc}
|
|
|
|
|
2006-04-01 12:43:03 +00:00
|
|
|
\constfunc{virtual size\_t}{MB2WC}{\param{wchar\_t *}{out}, \param{const char *}{in}, \param{size\_t }{outLen}}
|
|
|
|
|
|
|
|
Converts from a string \arg{in} in multibyte encoding to Unicode putting up to
|
|
|
|
\arg{outLen} characters into the buffer \arg{out}.
|
2000-07-15 19:51:35 +00:00
|
|
|
|
2006-04-01 12:43:03 +00:00
|
|
|
If \arg{out} is \NULL, only the length of the string which would result from
|
|
|
|
the conversion is calculated and returned. Note that this is the length and not
|
|
|
|
size, i.e. the returned value does \emph{not} include the trailing \NUL. But
|
|
|
|
when the function is called with a non-\NULL \arg{out} buffer, the \arg{outLen}
|
|
|
|
parameter should be one more to allow to properly \NUL-terminate the string.
|
2002-12-04 14:11:26 +00:00
|
|
|
|
|
|
|
\wxheading{Parameters}
|
|
|
|
|
2006-04-01 12:43:03 +00:00
|
|
|
\docparam{out}{The output buffer, may be \NULL if the caller is only
|
2002-12-04 14:11:26 +00:00
|
|
|
interested in the length of the resulting string}
|
|
|
|
|
2006-04-01 12:43:03 +00:00
|
|
|
\docparam{in}{The \NUL-terminated input string, cannot be \NULL}
|
2002-12-04 14:11:26 +00:00
|
|
|
|
2006-04-01 12:43:03 +00:00
|
|
|
\docparam{outLen}{The length of the output buffer but \emph{including}
|
|
|
|
\NUL, ignored if \arg{out} is \NULL}
|
2002-12-04 14:11:26 +00:00
|
|
|
|
|
|
|
\wxheading{Return value}
|
|
|
|
|
2006-04-03 22:16:22 +00:00
|
|
|
The length of the converted string \emph{excluding} the trailing \NUL.
|
2006-04-01 12:43:03 +00:00
|
|
|
|
2000-07-15 19:51:35 +00:00
|
|
|
|
|
|
|
\membersection{wxMBConv::WC2MB}\label{wxmbconvwc2mb}
|
|
|
|
|
|
|
|
\constfunc{virtual size\_t}{WC2MB}{\param{char* }{buf}, \param{const wchar\_t* }{psz}, \param{size\_t }{n}}
|
|
|
|
|
2002-12-04 14:11:26 +00:00
|
|
|
Converts from Unicode to multibyte encoding. The semantics of this function
|
|
|
|
(including the return value meaning) is the same as for
|
|
|
|
\helpref{MB2WC}{wxmbconvmb2wc}.
|
|
|
|
|
2006-04-01 12:43:03 +00:00
|
|
|
Notice that when the function is called with a non-\NULL buffer, the
|
|
|
|
{\it n} parameter should be the size of the buffer and so it \emph{should} take
|
2006-04-03 22:16:22 +00:00
|
|
|
into account the trailing \NUL, which might take two or four bytes for some
|
2006-04-01 12:43:03 +00:00
|
|
|
encodings (UTF-16 and UTF-32) and not one.
|
|
|
|
|
2000-07-15 19:51:35 +00:00
|
|
|
|
|
|
|
\membersection{wxMBConv::cMB2WC}\label{wxmbconvcmb2wc}
|
|
|
|
|
2006-04-01 12:43:03 +00:00
|
|
|
\constfunc{const wxWCharBuffer}{cMB2WC}{\param{const char *}{in}}
|
|
|
|
|
|
|
|
\constfunc{const wxWCharBuffer}{cMB2WC}{\param{const char *}{in}, \param{size\_t }{inLen}, \param{size\_t }{*outLen}}
|
|
|
|
|
|
|
|
Converts from multibyte encoding to Unicode by calling
|
|
|
|
\helpref{MB2WC}{wxmbconvmb2wc}, allocating a temporary wxWCharBuffer to hold
|
|
|
|
the result.
|
|
|
|
|
|
|
|
The first overload takes a \NUL-terminated input string. The second one takes a
|
|
|
|
string of exactly the specified length and the string may include or not the
|
2006-04-03 22:16:22 +00:00
|
|
|
trailing \NUL character(s). If the string is not \NUL-terminated, a temporary
|
2006-04-01 12:43:03 +00:00
|
|
|
\NUL-terminated copy of it suitable for passing to \helpref{MB2WC}{wxmbconvmb2wc}
|
|
|
|
is made, so it is more efficient to ensure that the string is does have the
|
|
|
|
appropriate number of \NUL bytes (which is usually $1$ but may be $2$ or $4$
|
2006-04-04 07:49:08 +00:00
|
|
|
for UTF-16 or UTF-32, see \helpref{GetMBNulLen}{wxmbconvgetmbnullen}),
|
|
|
|
especially for long strings.
|
2006-04-01 12:43:03 +00:00
|
|
|
|
|
|
|
If \arg{outLen} is not-\NULL, it receives the length of the converted
|
|
|
|
string.
|
2000-07-15 19:51:35 +00:00
|
|
|
|
|
|
|
|
|
|
|
\membersection{wxMBConv::cWC2MB}\label{wxmbconvcwc2mb}
|
|
|
|
|
2006-04-01 12:43:03 +00:00
|
|
|
\constfunc{const wxCharBuffer}{cWC2MB}{\param{const wchar\_t* }{in}}
|
|
|
|
|
|
|
|
\constfunc{const wxCharBuffer}{cWC2MB}{\param{const wchar\_t* }{in}, \param{size\_t }{inLen}, \param{size\_t }{*outLen}}
|
2000-07-15 19:51:35 +00:00
|
|
|
|
|
|
|
Converts from Unicode to multibyte encoding by calling WC2MB,
|
|
|
|
allocating a temporary wxCharBuffer to hold the result.
|
|
|
|
|
2006-04-01 12:43:03 +00:00
|
|
|
The second overload of this function allows to convert a string of the given
|
|
|
|
length \arg{inLen}, whether it is \NUL-terminated or not (for wide character
|
|
|
|
strings, unlike for the multibyte ones, a single \NUL is always enough).
|
|
|
|
But notice that just as with \helpref{cMB2WC}{wxmbconvmb2wc}, it is more
|
|
|
|
efficient to pass an already terminated string to this function as otherwise a
|
|
|
|
copy is made internally.
|
|
|
|
|
|
|
|
If \arg{outLen} is not-\NULL, it receives the length of the converted
|
|
|
|
string.
|
|
|
|
|
|
|
|
|
2000-07-15 19:51:35 +00:00
|
|
|
\membersection{wxMBConv::cMB2WX}\label{wxmbconvcmb2wx}
|
|
|
|
|
|
|
|
\constfunc{const char*}{cMB2WX}{\param{const char* }{psz}}
|
|
|
|
|
|
|
|
\constfunc{const wxWCharBuffer}{cMB2WX}{\param{const char* }{psz}}
|
|
|
|
|
|
|
|
Converts from multibyte encoding to the current wxChar type
|
|
|
|
(which depends on whether wxUSE\_UNICODE is set to 1). If wxChar is char,
|
|
|
|
it returns the parameter unaltered. If wxChar is wchar\_t, it returns the
|
|
|
|
result in a wxWCharBuffer. The macro wxMB2WXbuf is defined as the correct
|
|
|
|
return type (without const).
|
|
|
|
|
2006-04-01 12:43:03 +00:00
|
|
|
|
2000-07-15 19:51:35 +00:00
|
|
|
\membersection{wxMBConv::cWX2MB}\label{wxmbconvcwx2mb}
|
|
|
|
|
|
|
|
\constfunc{const char*}{cWX2MB}{\param{const wxChar* }{psz}}
|
|
|
|
|
|
|
|
\constfunc{const wxCharBuffer}{cWX2MB}{\param{const wxChar* }{psz}}
|
|
|
|
|
|
|
|
Converts from the current wxChar type to multibyte encoding. If wxChar is char,
|
|
|
|
it returns the parameter unaltered. If wxChar is wchar\_t, it returns the
|
|
|
|
result in a wxCharBuffer. The macro wxWX2MBbuf is defined as the correct
|
|
|
|
return type (without const).
|
|
|
|
|
2006-04-01 12:43:03 +00:00
|
|
|
|
2000-07-15 19:51:35 +00:00
|
|
|
\membersection{wxMBConv::cWC2WX}\label{wxmbconvcwc2wx}
|
|
|
|
|
|
|
|
\constfunc{const wchar\_t*}{cWC2WX}{\param{const wchar\_t* }{psz}}
|
|
|
|
|
|
|
|
\constfunc{const wxCharBuffer}{cWC2WX}{\param{const wchar\_t* }{psz}}
|
|
|
|
|
|
|
|
Converts from Unicode to the current wxChar type. If wxChar is wchar\_t,
|
|
|
|
it returns the parameter unaltered. If wxChar is char, it returns the
|
|
|
|
result in a wxCharBuffer. The macro wxWC2WXbuf is defined as the correct
|
|
|
|
return type (without const).
|
|
|
|
|
2006-04-01 12:43:03 +00:00
|
|
|
|
2000-07-15 19:51:35 +00:00
|
|
|
\membersection{wxMBConv::cWX2WC}\label{wxmbconvcwx2wc}
|
|
|
|
|
|
|
|
\constfunc{const wchar\_t*}{cWX2WC}{\param{const wxChar* }{psz}}
|
|
|
|
|
|
|
|
\constfunc{const wxWCharBuffer}{cWX2WC}{\param{const wxChar* }{psz}}
|
|
|
|
|
|
|
|
Converts from the current wxChar type to Unicode. If wxChar is wchar\_t,
|
|
|
|
it returns the parameter unaltered. If wxChar is char, it returns the
|
|
|
|
result in a wxWCharBuffer. The macro wxWX2WCbuf is defined as the correct
|
|
|
|
return type (without const).
|
|
|
|
|
2006-04-04 07:49:08 +00:00
|
|
|
|
|
|
|
\membersection{wxMBConv::GetMBNulLen}\label{wxmbconvgetmbnullen}
|
|
|
|
|
|
|
|
\constfunc{size\_t}{GetMBNulLen}{\void}
|
|
|
|
|
|
|
|
This function returns $1$ for most of the multibyte encodings in which the
|
|
|
|
string is terminated by a single \NUL, $2$ for UTF-16 and $4$ for UTF-32 for
|
|
|
|
which the string is terminated with $2$ and $4$ \NUL characters respectively.
|
|
|
|
The other cases are not currently supported and $-1$ is returned for them.
|
|
|
|
|
|
|
|
|