diff --git a/include/wx/convauto.h b/include/wx/convauto.h index 4c18dba601..25019a55e5 100644 --- a/include/wx/convauto.h +++ b/include/wx/convauto.h @@ -84,6 +84,9 @@ public: // return the BOM type of this buffer static wxBOM DetectBOM(const char *src, size_t srcLen); + // return the characters composing the given BOM. + static const char* GetBOMChars(wxBOM bomType, size_t* count); + wxBOM GetBOM() const { return m_bomType; diff --git a/interface/wx/convauto.h b/interface/wx/convauto.h index 715d06c279..d4d3919c4d 100644 --- a/interface/wx/convauto.h +++ b/interface/wx/convauto.h @@ -147,6 +147,26 @@ public: */ wxBOM GetBOM() const; + /** + Return a pointer to the characters that makes up this BOM. + + The returned character count is 2, 3 or 4, or undefined if the return + value is NULL. + + @param bom + A valid BOM type, i.e. not wxBOM_Unknown or wxBOM_None. + @param count + A non-@NULL pointer receiving the number of characters in this BOM. + @return + Pointer to characters composing the BOM or @NULL if BOM is unknown + or invalid. Notice that the returned string is not NUL-terminated + and may contain embedded NULs so @a count must be used to handle it + correctly. + + @since 2.9.3 + */ + const char* GetBOMChars(wxBOM bom, size_t* count); + /** Disable the use of the fall back encoding: if the input doesn't have a BOM and is not valid UTF-8, the conversion will fail. diff --git a/src/common/convauto.cpp b/src/common/convauto.cpp index 7480754bb6..3fcccd849c 100644 --- a/src/common/convauto.cpp +++ b/src/common/convauto.cpp @@ -31,6 +31,17 @@ // seem to be a good idea and there is no other reasonable alternative wxFontEncoding wxConvAuto::ms_defaultMBEncoding = wxFONTENCODING_ISO8859_1; +namespace +{ + +const char BOM_UTF32BE[] = { '\x00', '\x00', '\xFE', '\xFF' }; +const char BOM_UTF32LE[] = { '\xFF', '\xFE', '\x00', '\x00' }; +const char BOM_UTF16BE[] = { '\xFE', '\xFF' }; +const char BOM_UTF16LE[] = { '\xFF', '\xFE' }; +const char BOM_UTF8[] = { '\xEF', '\xBB', '\xBF' }; + +} // anonymous namespace + // ============================================================================ // implementation // ============================================================================ @@ -44,6 +55,28 @@ void wxConvAuto::SetFallbackEncoding(wxFontEncoding enc) ms_defaultMBEncoding = enc; } +/* static */ +const char* wxConvAuto::GetBOMChars(wxBOM bom, size_t* count) +{ + wxCHECK_MSG( count , NULL, wxS("count pointer must be provided") ); + + switch ( bom ) + { + case wxBOM_UTF32BE: *count = WXSIZEOF(BOM_UTF32BE); return BOM_UTF32BE; + case wxBOM_UTF32LE: *count = WXSIZEOF(BOM_UTF32LE); return BOM_UTF32LE; + case wxBOM_UTF16BE: *count = WXSIZEOF(BOM_UTF16BE); return BOM_UTF16BE; + case wxBOM_UTF16LE: *count = WXSIZEOF(BOM_UTF16LE); return BOM_UTF16LE; + case wxBOM_UTF8 : *count = WXSIZEOF(BOM_UTF8 ); return BOM_UTF8; + case wxBOM_Unknown: + case wxBOM_None: + wxFAIL_MSG( wxS("Invalid BOM type") ); + return NULL; + } + + wxFAIL_MSG( wxS("Unknown BOM type") ); + return NULL; +} + /* static */ wxBOM wxConvAuto::DetectBOM(const char *src, size_t srcLen) {