wxWidgets/interface/wx/encconv.h

187 lines
6.9 KiB
C
Raw Normal View History

/////////////////////////////////////////////////////////////////////////////
// Name: encconv.h
// Purpose: interface of wxEncodingConverter
// Author: wxWidgets team
// RCS-ID: $Id$
// Licence: wxWindows license
/////////////////////////////////////////////////////////////////////////////
/**
@class wxEncodingConverter
This class is capable of converting strings between two 8-bit encodings/charsets.
It can also convert from/to Unicode (but only if you compiled wxWidgets
with wxUSE_WCHAR_T set to 1).
Only a limited subset of encodings is supported by wxEncodingConverter:
@c wxFONTENCODING_ISO8859_1..15, @c wxFONTENCODING_CP1250..1257 and
@c wxFONTENCODING_KOI8.
@note
Please use wxMBConv classes instead if possible. wxCSConv has much better
support for various encodings than wxEncodingConverter.
wxEncodingConverter is useful only if you rely on wxCONVERT_SUBSTITUTE mode
of operation (see wxEncodingConverter::Init()).
@library{wxbase}
@category{misc}
@see wxFontMapper, wxMBConv, @ref overview_nonenglish
*/
class wxEncodingConverter : public wxObject
{
public:
/**
Constructor.
*/
wxEncodingConverter();
/**
Return @true if (any text in) multibyte encoding @a encIn can be converted to
another one (@a encOut) losslessly.
Do not call this method with @c wxFONTENCODING_UNICODE as either parameter,
it doesn't make sense (always works in one sense and always depends
on the text to convert in the other).
*/
static bool CanConvert(wxFontEncoding encIn,
wxFontEncoding encOut);
/**
@name Conversion functions
@{
*/
/**
Convert input string according to settings passed to Init() and writes
the result to output.
All the Convert() function overloads return @true if the conversion was
lossless and @false if at least one of the characters couldn't be converted
was and replaced with '?' in the output.
Note that if @c wxCONVERT_SUBSTITUTE was passed to Init(), substitution is
considered a lossless operation.
@note You must call Init() before using this method!
@note wchar_t versions of the method are not available if wxWidgets was
compiled with @c wxUSE_WCHAR_T set to 0.
*/
bool Convert(const char* input, char* output) const;
bool Convert(const wchar_t* input, wchar_t* output) const;
bool Convert(const char* input, wchar_t* output) const;
bool Convert(const wchar_t* input, char* output) const;
/**
Convert input string according to settings passed to Init() in-place,
i.e. write the result to the same memory area.
See the Convert(const char*,char*) const overload for more info.
*/
bool Convert(char* str) const;
bool Convert(wchar_t* str) const;
/**
Convert a wxString and return a new wxString object.
See the Convert(const char*,char*) const overload for more info.
*/
wxString Convert(const wxString& input) const;
//@}
/**
Similar to GetPlatformEquivalents(), but this one will return ALL
equivalent encodings, regardless of the platform, and including itself.
This platform's encodings are before others in the array.
And again, if @a enc is in the array, it is the very first item in it.
*/
static wxFontEncodingArray GetAllEquivalents(wxFontEncoding enc);
/**
Return equivalents for given font that are used under given platform.
Supported platforms:
@li wxPLATFORM_UNIX
@li wxPLATFORM_WINDOWS
@li wxPLATFORM_OS2
@li wxPLATFORM_MAC
@li wxPLATFORM_CURRENT
wxPLATFORM_CURRENT means the platform this binary was compiled for.
Examples:
@verbatim
current platform enc returned value
----------------------------------------------
unix CP1250 {ISO8859_2}
unix ISO8859_2 {ISO8859_2}
windows ISO8859_2 {CP1250}
unix CP1252 {ISO8859_1,ISO8859_15}
@endverbatim
Equivalence is defined in terms of convertibility: two encodings are
equivalent if you can convert text between then without losing
information (it may - and will - happen that you lose special chars
like quotation marks or em-dashes but you shouldn't lose any diacritics
and language-specific characters when converting between equivalent encodings).
Remember that this function does @b NOT check for presence of
fonts in system. It only tells you what are most suitable
encodings. (It usually returns only one encoding.)
@note Note that argument enc itself may be present in the returned array,
so that you can, as a side-effect, detect whether the encoding is
native for this platform or not.
@note Convert() is not limited to converting between equivalent encodings,
it can convert between two arbitrary encodings.
@note If @a enc is present in the returned array, then it is always the first
item of it.
@note Please note that the returned array may contain no items at all.
*/
static wxFontEncodingArray GetPlatformEquivalents(wxFontEncoding enc,
int platform = wxPLATFORM_CURRENT);
/**
Initialize the conversion.
Both output or input encoding may be wxFONTENCODING_UNICODE, but only
if wxUSE_ENCODING is set to 1.
All subsequent calls to Convert() will interpret its argument
as a string in @a input_enc encoding and will output string in
@a output_enc encoding.
You must call this method before calling Convert. You may call
it more than once in order to switch to another conversion.
@a method affects behaviour of Convert() in case input character
cannot be converted because it does not exist in output encoding:
@li @b wxCONVERT_STRICT: follow behaviour of GNU Recode - just copy
unconvertible characters to output and don't change them
(its integer value will stay the same)
@li @b wxCONVERT_SUBSTITUTE: try some (lossy) substitutions - e.g.
replace unconvertible latin capitals with acute by ordinary
capitals, replace en-dash or em-dash by '-' etc.
Both modes guarantee that output string will have same length
as input string.
@return @false if given conversion is impossible, @true otherwise
(conversion may be impossible either if you try to convert
to Unicode with non-Unicode build of wxWidgets or if input
or output encoding is not supported).
*/
bool Init(wxFontEncoding input_enc, wxFontEncoding output_enc,
int method = wxCONVERT_STRICT);
};