2008-03-08 13:52:38 +00:00
|
|
|
/////////////////////////////////////////////////////////////////////////////
|
|
|
|
// Name: regex.h
|
2008-03-10 15:24:38 +00:00
|
|
|
// Purpose: interface of wxRegEx
|
2008-03-08 13:52:38 +00:00
|
|
|
// Author: wxWidgets team
|
|
|
|
// RCS-ID: $Id$
|
|
|
|
// Licence: wxWindows license
|
|
|
|
/////////////////////////////////////////////////////////////////////////////
|
|
|
|
|
|
|
|
/**
|
|
|
|
@class wxRegEx
|
|
|
|
@wxheader{regex.h}
|
2008-03-08 14:43:31 +00:00
|
|
|
|
2008-03-08 13:52:38 +00:00
|
|
|
wxRegEx represents a regular expression. This class provides support
|
|
|
|
for regular expressions matching and also replacement.
|
2008-03-08 14:43:31 +00:00
|
|
|
|
2008-03-08 13:52:38 +00:00
|
|
|
It is built on top of either the system library (if it has support
|
|
|
|
for POSIX regular expressions - which is the case of the most modern
|
|
|
|
Unices) or uses the built in Henry Spencer's library. Henry Spencer
|
|
|
|
would appreciate being given credit in the documentation of software
|
|
|
|
which uses his library, but that is not a requirement.
|
2008-03-08 14:43:31 +00:00
|
|
|
|
2008-03-08 13:52:38 +00:00
|
|
|
Regular expressions, as defined by POSIX, come in two flavours: @e extended
|
|
|
|
and @e basic. The builtin library also adds a third flavour
|
2008-03-10 15:24:38 +00:00
|
|
|
of expression advanced(), which is not available
|
2008-03-08 13:52:38 +00:00
|
|
|
when using the system library.
|
2008-03-08 14:43:31 +00:00
|
|
|
|
2008-03-08 13:52:38 +00:00
|
|
|
Unicode is fully supported only when using the builtin library.
|
|
|
|
When using the system library in Unicode mode, the expressions and data
|
|
|
|
are translated to the default 8-bit encoding before being passed to
|
|
|
|
the library.
|
2008-03-08 14:43:31 +00:00
|
|
|
|
2008-03-08 13:52:38 +00:00
|
|
|
On platforms where a system library is available, the default is to use
|
|
|
|
the builtin library for Unicode builds, and the system library otherwise.
|
|
|
|
It is possible to use the other if preferred by selecting it when building
|
|
|
|
the wxWidgets.
|
2008-03-08 14:43:31 +00:00
|
|
|
|
2008-03-08 13:52:38 +00:00
|
|
|
@library{wxbase}
|
|
|
|
@category{data}
|
2008-03-08 14:43:31 +00:00
|
|
|
|
2008-03-10 15:24:38 +00:00
|
|
|
@see wxRegEx::ReplaceFirst
|
2008-03-08 13:52:38 +00:00
|
|
|
*/
|
2008-03-08 14:43:31 +00:00
|
|
|
class wxRegEx
|
2008-03-08 13:52:38 +00:00
|
|
|
{
|
|
|
|
public:
|
|
|
|
//@{
|
|
|
|
/**
|
2008-03-08 14:43:31 +00:00
|
|
|
Create and compile the regular expression, use
|
2008-03-08 13:52:38 +00:00
|
|
|
IsValid() to test for compilation errors.
|
|
|
|
*/
|
|
|
|
wxRegEx();
|
2008-03-08 14:43:31 +00:00
|
|
|
wxRegEx(const wxString& expr, int flags = wxRE_DEFAULT);
|
2008-03-08 13:52:38 +00:00
|
|
|
//@}
|
|
|
|
|
|
|
|
/**
|
|
|
|
dtor not virtual, don't derive from this class
|
|
|
|
*/
|
|
|
|
~wxRegEx();
|
|
|
|
|
|
|
|
/**
|
2008-03-08 14:43:31 +00:00
|
|
|
Compile the string into regular expression, return @true if ok or @false
|
2008-03-08 13:52:38 +00:00
|
|
|
if string has a syntax error.
|
|
|
|
*/
|
|
|
|
bool Compile(const wxString& pattern, int flags = wxRE_DEFAULT);
|
|
|
|
|
|
|
|
//@{
|
|
|
|
/**
|
2008-03-09 12:33:59 +00:00
|
|
|
Returns the part of string corresponding to the match where @a index is
|
2008-03-08 13:52:38 +00:00
|
|
|
interpreted as above. Empty string is returned if match failed
|
2008-03-08 14:43:31 +00:00
|
|
|
May only be called after successful call to Matches()
|
|
|
|
and only if @c wxRE_NOSUB was @b not used in
|
2008-03-08 13:52:38 +00:00
|
|
|
Compile().
|
|
|
|
*/
|
2008-03-09 16:24:26 +00:00
|
|
|
bool GetMatch(size_t* start, size_t* len, size_t index = 0) const;
|
|
|
|
const not used in
|
|
|
|
Compile().
|
|
|
|
Returns false if no match or if an error occurred.
|
|
|
|
wxString GetMatch(const wxString& text, size_t index = 0) const;
|
2008-03-08 13:52:38 +00:00
|
|
|
//@}
|
|
|
|
|
|
|
|
/**
|
|
|
|
Returns the size of the array of matches, i.e. the number of bracketed
|
|
|
|
subexpressions plus one for the expression itself, or 0 on error.
|
|
|
|
May only be called after successful call to Compile().
|
|
|
|
and only if @c wxRE_NOSUB was @b not used.
|
|
|
|
*/
|
2008-03-09 16:24:26 +00:00
|
|
|
size_t GetMatchCount() const;
|
2008-03-08 13:52:38 +00:00
|
|
|
|
|
|
|
/**
|
2008-03-08 14:43:31 +00:00
|
|
|
Return @true if this is a valid compiled regular expression, @false
|
2008-03-08 13:52:38 +00:00
|
|
|
otherwise.
|
|
|
|
*/
|
2008-03-09 16:24:26 +00:00
|
|
|
bool IsValid() const;
|
2008-03-08 13:52:38 +00:00
|
|
|
|
|
|
|
//@{
|
|
|
|
/**
|
|
|
|
Matches the precompiled regular expression against the string @e text,
|
|
|
|
returns @true if matches and @false otherwise.
|
|
|
|
@e Flags may be combination of @c wxRE_NOTBOL and @c wxRE_NOTEOL.
|
|
|
|
Some regex libraries assume that the text given is null terminated, while
|
|
|
|
others require the length be given as a separate parameter. Therefore for
|
2008-03-09 12:33:59 +00:00
|
|
|
maximum portability assume that @a text cannot contain embedded nulls.
|
2008-03-08 13:52:38 +00:00
|
|
|
When the @e Matches(const wxChar *text, int flags = 0) form is used,
|
|
|
|
a @e wxStrlen() will be done internally if the regex library requires the
|
|
|
|
length. When using @e Matches() in a loop
|
|
|
|
the @e Matches(text, flags, len) form can be used instead, making it
|
|
|
|
possible to avoid a @e wxStrlen() inside the loop.
|
|
|
|
May only be called after successful call to Compile().
|
|
|
|
*/
|
2008-03-09 16:24:26 +00:00
|
|
|
bool Matches(const wxChar* text, int flags = 0) const;
|
|
|
|
const bool Matches(const wxChar* text, int flags, size_t len) const;
|
|
|
|
const bool Matches(const wxString& text, int flags = 0) const;
|
2008-03-08 13:52:38 +00:00
|
|
|
//@}
|
|
|
|
|
|
|
|
/**
|
|
|
|
Replaces the current regular expression in the string pointed to by
|
2008-03-09 12:33:59 +00:00
|
|
|
@e text, with the text in @a replacement and return number of matches
|
2008-03-08 13:52:38 +00:00
|
|
|
replaced (maybe 0 if none found) or -1 on error.
|
|
|
|
The replacement text may contain back references @c \number which will be
|
|
|
|
replaced with the value of the corresponding subexpression in the
|
|
|
|
pattern match. @c \0 corresponds to the entire match and @c is a
|
|
|
|
synonym for it. Backslash may be used to quote itself or @c character.
|
2008-03-09 12:33:59 +00:00
|
|
|
@a maxMatches may be used to limit the number of replacements made, setting
|
2008-03-08 13:52:38 +00:00
|
|
|
it to 1, for example, will only replace first occurrence (if any) of the
|
|
|
|
pattern in the text while default value of 0 means replace all.
|
|
|
|
*/
|
|
|
|
int Replace(wxString* text, const wxString& replacement,
|
2008-03-09 16:24:26 +00:00
|
|
|
size_t maxMatches = 0) const;
|
2008-03-08 13:52:38 +00:00
|
|
|
|
|
|
|
/**
|
2008-03-08 14:43:31 +00:00
|
|
|
Replace all occurrences: this is actually a synonym for
|
2008-03-08 13:52:38 +00:00
|
|
|
Replace().
|
2008-03-20 13:45:17 +00:00
|
|
|
|
2008-03-09 12:33:59 +00:00
|
|
|
@see ReplaceFirst()
|
2008-03-08 13:52:38 +00:00
|
|
|
*/
|
2008-03-09 16:24:26 +00:00
|
|
|
int ReplaceAll(wxString* text, const wxString& replacement) const;
|
2008-03-08 13:52:38 +00:00
|
|
|
|
|
|
|
/**
|
|
|
|
Replace the first occurrence.
|
|
|
|
*/
|
2008-03-09 16:24:26 +00:00
|
|
|
int ReplaceFirst(wxString* text, const wxString& replacement) const;
|
2008-03-08 13:52:38 +00:00
|
|
|
};
|
2008-03-10 15:24:38 +00:00
|
|
|
|