2008-03-08 13:52:38 +00:00
|
|
|
/////////////////////////////////////////////////////////////////////////////
|
|
|
|
// Name: encconv.h
|
2008-03-10 15:24:38 +00:00
|
|
|
// Purpose: interface of wxEncodingConverter
|
2008-03-08 13:52:38 +00:00
|
|
|
// Author: wxWidgets team
|
2010-07-13 13:29:13 +00:00
|
|
|
// Licence: wxWindows licence
|
2008-03-08 13:52:38 +00:00
|
|
|
/////////////////////////////////////////////////////////////////////////////
|
|
|
|
|
|
|
|
/**
|
|
|
|
@class wxEncodingConverter
|
2008-03-08 14:43:31 +00:00
|
|
|
|
2008-03-28 16:19:12 +00:00
|
|
|
This class is capable of converting strings between two 8-bit encodings/charsets.
|
2010-04-16 10:43:18 +00:00
|
|
|
It can also convert from/to Unicode.
|
2008-03-28 16:19:12 +00:00
|
|
|
|
|
|
|
Only a limited subset of encodings is supported by wxEncodingConverter:
|
2008-03-08 14:43:31 +00:00
|
|
|
@c wxFONTENCODING_ISO8859_1..15, @c wxFONTENCODING_CP1250..1257 and
|
2008-03-08 13:52:38 +00:00
|
|
|
@c wxFONTENCODING_KOI8.
|
2008-03-08 14:43:31 +00:00
|
|
|
|
2008-03-28 16:19:12 +00:00
|
|
|
@note
|
|
|
|
Please use wxMBConv classes instead if possible. wxCSConv has much better
|
|
|
|
support for various encodings than wxEncodingConverter.
|
|
|
|
wxEncodingConverter is useful only if you rely on wxCONVERT_SUBSTITUTE mode
|
|
|
|
of operation (see wxEncodingConverter::Init()).
|
|
|
|
|
2008-03-08 13:52:38 +00:00
|
|
|
@library{wxbase}
|
2009-02-20 11:34:52 +00:00
|
|
|
@category{conv}
|
2008-03-08 14:43:31 +00:00
|
|
|
|
2008-03-28 16:19:12 +00:00
|
|
|
@see wxFontMapper, wxMBConv, @ref overview_nonenglish
|
2008-03-08 13:52:38 +00:00
|
|
|
*/
|
|
|
|
class wxEncodingConverter : public wxObject
|
|
|
|
{
|
|
|
|
public:
|
|
|
|
/**
|
|
|
|
Constructor.
|
|
|
|
*/
|
|
|
|
wxEncodingConverter();
|
|
|
|
|
|
|
|
/**
|
2008-03-09 12:33:59 +00:00
|
|
|
Return @true if (any text in) multibyte encoding @a encIn can be converted to
|
2008-03-28 16:19:12 +00:00
|
|
|
another one (@a encOut) losslessly.
|
|
|
|
|
|
|
|
Do not call this method with @c wxFONTENCODING_UNICODE as either parameter,
|
|
|
|
it doesn't make sense (always works in one sense and always depends
|
2008-03-08 13:52:38 +00:00
|
|
|
on the text to convert in the other).
|
|
|
|
*/
|
|
|
|
static bool CanConvert(wxFontEncoding encIn,
|
|
|
|
wxFontEncoding encOut);
|
|
|
|
|
|
|
|
/**
|
2008-03-28 16:19:12 +00:00
|
|
|
@name Conversion functions
|
|
|
|
|
|
|
|
@{
|
|
|
|
*/
|
|
|
|
/**
|
|
|
|
Convert input string according to settings passed to Init() and writes
|
|
|
|
the result to output.
|
|
|
|
|
|
|
|
All the Convert() function overloads return @true if the conversion was
|
|
|
|
lossless and @false if at least one of the characters couldn't be converted
|
|
|
|
was and replaced with '?' in the output.
|
|
|
|
|
|
|
|
Note that if @c wxCONVERT_SUBSTITUTE was passed to Init(), substitution is
|
|
|
|
considered a lossless operation.
|
|
|
|
|
|
|
|
@note You must call Init() before using this method!
|
2008-03-08 13:52:38 +00:00
|
|
|
*/
|
2008-03-09 16:24:26 +00:00
|
|
|
bool Convert(const char* input, char* output) const;
|
2008-03-28 16:19:12 +00:00
|
|
|
bool Convert(const wchar_t* input, wchar_t* output) const;
|
|
|
|
bool Convert(const char* input, wchar_t* output) const;
|
|
|
|
bool Convert(const wchar_t* input, char* output) const;
|
|
|
|
|
|
|
|
/**
|
2012-01-03 23:27:50 +00:00
|
|
|
Convert input string according to settings passed to Init() in-place.
|
|
|
|
|
|
|
|
With this overload, the conversion result is written to the same memory
|
|
|
|
area from which the input is read.
|
2008-03-28 16:19:12 +00:00
|
|
|
|
|
|
|
See the Convert(const char*,char*) const overload for more info.
|
|
|
|
*/
|
|
|
|
bool Convert(char* str) const;
|
2012-01-03 23:27:50 +00:00
|
|
|
|
|
|
|
/**
|
|
|
|
Convert input string according to settings passed to Init() in-place.
|
|
|
|
|
|
|
|
With this overload, the conversion result is written to the same memory
|
|
|
|
area from which the input is read.
|
|
|
|
|
|
|
|
See the Convert(const wchar_t*,wchar_t*) const overload for more info.
|
|
|
|
*/
|
2008-03-28 16:19:12 +00:00
|
|
|
bool Convert(wchar_t* str) const;
|
|
|
|
|
|
|
|
/**
|
|
|
|
Convert a wxString and return a new wxString object.
|
|
|
|
|
|
|
|
See the Convert(const char*,char*) const overload for more info.
|
|
|
|
*/
|
|
|
|
wxString Convert(const wxString& input) const;
|
2008-03-08 13:52:38 +00:00
|
|
|
//@}
|
|
|
|
|
2008-03-28 16:19:12 +00:00
|
|
|
|
2008-03-08 13:52:38 +00:00
|
|
|
/**
|
2008-03-28 16:19:12 +00:00
|
|
|
Similar to GetPlatformEquivalents(), but this one will return ALL
|
2008-03-08 13:52:38 +00:00
|
|
|
equivalent encodings, regardless of the platform, and including itself.
|
2008-03-28 16:19:12 +00:00
|
|
|
|
|
|
|
This platform's encodings are before others in the array.
|
|
|
|
And again, if @a enc is in the array, it is the very first item in it.
|
2008-03-08 13:52:38 +00:00
|
|
|
*/
|
|
|
|
static wxFontEncodingArray GetAllEquivalents(wxFontEncoding enc);
|
|
|
|
|
|
|
|
/**
|
2008-03-28 16:19:12 +00:00
|
|
|
Return equivalents for given font that are used under given platform.
|
|
|
|
|
|
|
|
Supported platforms:
|
|
|
|
@li wxPLATFORM_UNIX
|
|
|
|
@li wxPLATFORM_WINDOWS
|
|
|
|
@li wxPLATFORM_MAC
|
|
|
|
@li wxPLATFORM_CURRENT
|
|
|
|
|
2008-03-08 13:52:38 +00:00
|
|
|
wxPLATFORM_CURRENT means the platform this binary was compiled for.
|
2008-03-28 16:19:12 +00:00
|
|
|
|
2008-03-08 13:52:38 +00:00
|
|
|
Examples:
|
2008-03-20 13:45:17 +00:00
|
|
|
|
2008-03-28 16:19:12 +00:00
|
|
|
@verbatim
|
|
|
|
current platform enc returned value
|
|
|
|
----------------------------------------------
|
|
|
|
unix CP1250 {ISO8859_2}
|
|
|
|
unix ISO8859_2 {ISO8859_2}
|
|
|
|
windows ISO8859_2 {CP1250}
|
|
|
|
unix CP1252 {ISO8859_1,ISO8859_15}
|
|
|
|
@endverbatim
|
|
|
|
|
|
|
|
Equivalence is defined in terms of convertibility: two encodings are
|
|
|
|
equivalent if you can convert text between then without losing
|
|
|
|
information (it may - and will - happen that you lose special chars
|
|
|
|
like quotation marks or em-dashes but you shouldn't lose any diacritics
|
|
|
|
and language-specific characters when converting between equivalent encodings).
|
|
|
|
|
2008-03-08 13:52:38 +00:00
|
|
|
Remember that this function does @b NOT check for presence of
|
|
|
|
fonts in system. It only tells you what are most suitable
|
|
|
|
encodings. (It usually returns only one encoding.)
|
2008-03-28 16:19:12 +00:00
|
|
|
|
|
|
|
@note Note that argument enc itself may be present in the returned array,
|
|
|
|
so that you can, as a side-effect, detect whether the encoding is
|
|
|
|
native for this platform or not.
|
|
|
|
|
|
|
|
@note Convert() is not limited to converting between equivalent encodings,
|
|
|
|
it can convert between two arbitrary encodings.
|
|
|
|
|
|
|
|
@note If @a enc is present in the returned array, then it is always the first
|
|
|
|
item of it.
|
|
|
|
|
|
|
|
@note Please note that the returned array may contain no items at all.
|
2008-03-08 13:52:38 +00:00
|
|
|
*/
|
|
|
|
static wxFontEncodingArray GetPlatformEquivalents(wxFontEncoding enc,
|
2008-03-28 16:19:12 +00:00
|
|
|
int platform = wxPLATFORM_CURRENT);
|
2008-03-08 13:52:38 +00:00
|
|
|
|
|
|
|
/**
|
2008-03-28 16:19:12 +00:00
|
|
|
Initialize the conversion.
|
|
|
|
|
|
|
|
Both output or input encoding may be wxFONTENCODING_UNICODE, but only
|
|
|
|
if wxUSE_ENCODING is set to 1.
|
|
|
|
|
|
|
|
All subsequent calls to Convert() will interpret its argument
|
2008-03-09 12:33:59 +00:00
|
|
|
as a string in @a input_enc encoding and will output string in
|
|
|
|
@a output_enc encoding.
|
2008-03-28 16:19:12 +00:00
|
|
|
|
2008-03-08 14:43:31 +00:00
|
|
|
You must call this method before calling Convert. You may call
|
2008-03-08 13:52:38 +00:00
|
|
|
it more than once in order to switch to another conversion.
|
2008-03-20 13:45:17 +00:00
|
|
|
|
2008-03-28 16:19:12 +00:00
|
|
|
@a method affects behaviour of Convert() in case input character
|
|
|
|
cannot be converted because it does not exist in output encoding:
|
2008-03-20 13:45:17 +00:00
|
|
|
|
2008-03-28 16:19:12 +00:00
|
|
|
@li @b wxCONVERT_STRICT: follow behaviour of GNU Recode - just copy
|
|
|
|
unconvertible characters to output and don't change them
|
|
|
|
(its integer value will stay the same)
|
|
|
|
@li @b wxCONVERT_SUBSTITUTE: try some (lossy) substitutions - e.g.
|
|
|
|
replace unconvertible latin capitals with acute by ordinary
|
|
|
|
capitals, replace en-dash or em-dash by '-' etc.
|
2008-03-20 13:45:17 +00:00
|
|
|
|
2008-03-08 13:52:38 +00:00
|
|
|
Both modes guarantee that output string will have same length
|
|
|
|
as input string.
|
2008-03-28 16:19:12 +00:00
|
|
|
|
|
|
|
@return @false if given conversion is impossible, @true otherwise
|
|
|
|
(conversion may be impossible either if you try to convert
|
|
|
|
to Unicode with non-Unicode build of wxWidgets or if input
|
|
|
|
or output encoding is not supported).
|
2008-03-08 13:52:38 +00:00
|
|
|
*/
|
|
|
|
bool Init(wxFontEncoding input_enc, wxFontEncoding output_enc,
|
|
|
|
int method = wxCONVERT_STRICT);
|
|
|
|
};
|
2008-03-10 15:24:38 +00:00
|
|
|
|