1. bug in wxString::find_first_of() fixed
2. new wxStringTokenizer class and the docs for it git-svn-id: https://svn.wxwidgets.org/svn/wx/wxWidgets/trunk@5766 c3d73ce0-8a6f-49c7-b76d-6d57e0e08775
This commit is contained in:
parent
54744d3ab7
commit
bbf8fc5391
@ -2,6 +2,36 @@
|
||||
|
||||
wxStringTokenizer helps you to break a string up into a number of tokens.
|
||||
|
||||
To use this class, you should create a wxStringTokenizer object, give it the
|
||||
string to tokenize and also the delimiters which separate tokens in the string
|
||||
(by default, white space characters will be used).
|
||||
|
||||
Then \helpref{GetNextToken}{wxstringtokenizergetnexttoken} may be called
|
||||
repeatedly until it \helpref{HasMoreTokens}{wxstringtokenizerhasmoretokens}
|
||||
returns FALSE.
|
||||
|
||||
For example:
|
||||
|
||||
\begin{verbatim}
|
||||
|
||||
wxStringTokenizer tkz("first:second:third::fivth", ":");
|
||||
while ( tkz.HasMoreTokens() )
|
||||
{
|
||||
wxString token = tkz.GetNextToken();
|
||||
|
||||
// process token here
|
||||
}
|
||||
\end{verbatim}
|
||||
|
||||
Another feature of this class is that it may return the delimiter which
|
||||
was found after the token with it. In a simple case like above, you are not
|
||||
interested in this because the delimiter is always {\tt ':'}, but if the
|
||||
delimiters string has several characters, you might need to know which of them
|
||||
follows the current token. In this case, pass {\tt TRUE} to wxStringTokenizer
|
||||
constructor or \helpref{SetString}{wxstringtokenizersetstring} method and
|
||||
the delimiter will be appended to each returned token (except for the last
|
||||
one).
|
||||
|
||||
\wxheading{Derived from}
|
||||
|
||||
\helpref{wxObject}{wxobject}
|
||||
@ -21,7 +51,7 @@ Default constructor.
|
||||
\func{}{wxStringTokenizer}{\param{const wxString\& }{to\_tokenize}, \param{const wxString\& }{delims = " $\backslash$t$\backslash$r$\backslash$n"}, \param{bool }{ret\_delim = FALSE}}
|
||||
|
||||
Constructor. Pass the string to tokenize, a string containing delimiters,
|
||||
a flag specifying whether delimiters are retained.
|
||||
a flag specifying whether to return delimiters with tokens.
|
||||
|
||||
\membersection{wxStringTokenizer::\destruct{wxStringTokenizer}}\label{wxstringtokenizerdtor}
|
||||
|
||||
@ -45,13 +75,21 @@ Returns TRUE if the tokenizer has further tokens.
|
||||
|
||||
\constfunc{wxString}{GetNextToken}{\void}
|
||||
|
||||
Returns the next token.
|
||||
Returns the next token or empty string if the end of string was reached.
|
||||
|
||||
\membersection{wxStringTokenizer::GetPosition}\label{wxstringtokenizergetposition}
|
||||
|
||||
\constfunc{size\_t}{GetPosition}{\void}
|
||||
|
||||
Returns the current position (i.e. one index after the last returned
|
||||
token or 0 if GetNextToken() has never been called) in the original
|
||||
string.
|
||||
|
||||
\membersection{wxStringTokenizer::GetString}\label{wxstringtokenizergetstring}
|
||||
|
||||
\constfunc{wxString}{GetString}{\void}
|
||||
|
||||
Returns the input string.
|
||||
Returns the part of the starting string without all token already extracted.
|
||||
|
||||
\membersection{wxStringTokenizer::SetString}\label{wxstringtokenizersetstring}
|
||||
|
||||
@ -60,5 +98,5 @@ Returns the input string.
|
||||
Initializes the tokenizer.
|
||||
|
||||
Pass the string to tokenize, a string containing delimiters,
|
||||
a flag specifying whether delimiters are retained.
|
||||
a flag specifying whether to return delimiters with tokens.
|
||||
|
||||
|
@ -2,7 +2,7 @@
|
||||
// Name: tokenzr.h
|
||||
// Purpose: String tokenizer
|
||||
// Author: Guilhem Lavaux
|
||||
// Modified by: Gregory Pietsch
|
||||
// Modified by: Vadim Zeitlin
|
||||
// Created: 04/22/98
|
||||
// RCS-ID: $Id$
|
||||
// Copyright: (c) Guilhem Lavaux
|
||||
@ -18,47 +18,49 @@
|
||||
|
||||
#include "wx/object.h"
|
||||
#include "wx/string.h"
|
||||
#include "wx/filefn.h"
|
||||
|
||||
// default: delimiters are usual white space characters
|
||||
#define wxDEFAULT_DELIMITERS (_T(" \t\r\n"))
|
||||
|
||||
class WXDLLEXPORT wxStringTokenizer : public wxObject
|
||||
{
|
||||
public:
|
||||
wxStringTokenizer(const wxString& to_tokenize,
|
||||
const wxString& delims = " \t\r\n",
|
||||
bool ret_delim = FALSE);
|
||||
// ctors and such
|
||||
wxStringTokenizer() { m_retdelims = FALSE; m_pos = 0; }
|
||||
wxStringTokenizer(const wxString& to_tokenize,
|
||||
const wxString& delims = wxDEFAULT_DELIMITERS,
|
||||
bool ret_delim = FALSE);
|
||||
void SetString(const wxString& to_tokenize,
|
||||
const wxString& delims = wxDEFAULT_DELIMITERS,
|
||||
bool ret_delim = FALSE);
|
||||
virtual ~wxStringTokenizer();
|
||||
|
||||
int CountTokens() const;
|
||||
bool HasMoreTokens();
|
||||
// count tokens/get next token
|
||||
size_t CountTokens() const;
|
||||
bool HasMoreTokens() { return m_hasMore; }
|
||||
wxString GetNextToken();
|
||||
|
||||
wxString NextToken();
|
||||
wxString GetNextToken() { return NextToken(); };
|
||||
|
||||
wxString GetString() const { return m_string; }
|
||||
// One note about GetString -- it returns the string
|
||||
// remaining after the previous tokens have been removed,
|
||||
// not the original string
|
||||
wxString GetString() const { return m_string; }
|
||||
|
||||
void SetString(const wxString& to_tokenize,
|
||||
const wxString& delims = " \t\r\n",
|
||||
bool ret_delim = FALSE)
|
||||
{
|
||||
m_string = to_tokenize;
|
||||
m_delims = delims;
|
||||
m_retdelims = ret_delim;
|
||||
m_pos = 0;
|
||||
}
|
||||
// returns the current position (i.e. one index after the last returned
|
||||
// token or 0 if GetNextToken() has never been called) in the original
|
||||
// string
|
||||
size_t GetPosition() const { return m_pos; }
|
||||
|
||||
// Here's the desired function. It returns the position
|
||||
// of the next token in the original string by keeping track
|
||||
// of everything that's been deleted by GetNextToken.
|
||||
wxUint32 GetPosition() { return m_pos; }
|
||||
// for compatibility only, use GetNextToken() instead
|
||||
wxString NextToken() { return GetNextToken(); }
|
||||
|
||||
protected:
|
||||
wxString m_string, m_delims;
|
||||
bool m_retdelims;
|
||||
wxUint32 m_pos; // the position
|
||||
wxString m_string, // the (rest of) string to tokenize
|
||||
m_delims; // all delimiters
|
||||
|
||||
size_t m_pos; // the position in the original string
|
||||
|
||||
bool m_retdelims; // if TRUE, return delims with tokens
|
||||
bool m_hasMore; // do we have more tokens?
|
||||
};
|
||||
|
||||
#endif // _WX_TOKENZRH
|
||||
|
@ -29,16 +29,16 @@
|
||||
|
||||
// what to test?
|
||||
|
||||
#define TEST_ARRAYS
|
||||
#define TEST_CMDLINE
|
||||
#define TEST_DIR
|
||||
#define TEST_EXECUTE
|
||||
#define TEST_LOG
|
||||
#define TEST_LONGLONG
|
||||
#define TEST_MIME
|
||||
//#define TEST_ARRAYS
|
||||
//#define TEST_CMDLINE
|
||||
//#define TEST_DIR
|
||||
//#define TEST_EXECUTE
|
||||
//#define TEST_LOG
|
||||
//#define TEST_LONGLONG
|
||||
//#define TEST_MIME
|
||||
#define TEST_STRINGS
|
||||
#define TEST_THREADS
|
||||
#define TEST_TIME
|
||||
//#define TEST_THREADS
|
||||
//#define TEST_TIME
|
||||
|
||||
// ============================================================================
|
||||
// implementation
|
||||
@ -1699,6 +1699,7 @@ void PrintArray(const char* name, const wxArrayString& array)
|
||||
#ifdef TEST_STRINGS
|
||||
|
||||
#include "wx/timer.h"
|
||||
#include "wx/tokenzr.h"
|
||||
|
||||
static void TestString()
|
||||
{
|
||||
@ -1826,6 +1827,77 @@ static void TestStringFind()
|
||||
puts("");
|
||||
}
|
||||
|
||||
// replace TABs with \t and CRs with \n
|
||||
static wxString MakePrintable(const wxChar *s)
|
||||
{
|
||||
wxString str(s);
|
||||
(void)str.Replace(_T("\t"), _T("\\t"));
|
||||
(void)str.Replace(_T("\n"), _T("\\n"));
|
||||
(void)str.Replace(_T("\r"), _T("\\r"));
|
||||
|
||||
return str;
|
||||
}
|
||||
|
||||
static void TestStringTokenizer()
|
||||
{
|
||||
puts("*** Testing wxStringTokenizer ***");
|
||||
|
||||
static const struct StringTokenizerTest
|
||||
{
|
||||
const wxChar *str; // string to tokenize
|
||||
const wxChar *delims; // delimiters to use
|
||||
size_t count; // count of token
|
||||
bool with; // return tokens with delimiters?
|
||||
} tokenizerTestData[] =
|
||||
{
|
||||
{ _T(""), _T(" "), 0, FALSE },
|
||||
{ _T("Hello, world"), _T(" "), 2, FALSE },
|
||||
{ _T("Hello, world"), _T(","), 2, FALSE },
|
||||
{ _T("Hello, world!"), _T(",!"), 3, TRUE },
|
||||
{ _T("username:password:uid:gid:gecos:home:shell"), _T(":"), 7, FALSE },
|
||||
{ _T("1 \t3\t4 6 "), wxDEFAULT_DELIMITERS, 9, TRUE },
|
||||
{ _T("01/02/99"), _T("/-"), 3, FALSE },
|
||||
};
|
||||
|
||||
for ( size_t n = 0; n < WXSIZEOF(tokenizerTestData); n++ )
|
||||
{
|
||||
const StringTokenizerTest& tt = tokenizerTestData[n];
|
||||
wxStringTokenizer tkz(tt.str, tt.delims, tt.with);
|
||||
|
||||
size_t count = tkz.CountTokens();
|
||||
printf(_T("String '%s' has %u tokens delimited by '%s' "),
|
||||
tt.str,
|
||||
count,
|
||||
MakePrintable(tt.delims).c_str());
|
||||
if ( count == tt.count )
|
||||
{
|
||||
puts(_T("(ok)"));
|
||||
}
|
||||
else
|
||||
{
|
||||
printf(_T("(ERROR: should be %u)\n"), tt.count);
|
||||
|
||||
continue;
|
||||
}
|
||||
|
||||
// now show the tokens themselves
|
||||
size_t count2 = 0;
|
||||
while ( tkz.HasMoreTokens() )
|
||||
{
|
||||
printf(_T("\ttoken %u: '%s'\n"),
|
||||
++count2,
|
||||
MakePrintable(tkz.GetNextToken()).c_str());
|
||||
}
|
||||
|
||||
if ( count2 != count )
|
||||
{
|
||||
puts(_T("ERROR: token count mismatch"));
|
||||
}
|
||||
}
|
||||
|
||||
puts("");
|
||||
}
|
||||
|
||||
#endif // TEST_STRINGS
|
||||
|
||||
// ----------------------------------------------------------------------------
|
||||
@ -1889,8 +1961,9 @@ int main(int argc, char **argv)
|
||||
{
|
||||
TestStringSub();
|
||||
TestStringFormat();
|
||||
TestStringFind();
|
||||
}
|
||||
TestStringFind();
|
||||
TestStringTokenizer();
|
||||
#endif // TEST_STRINGS
|
||||
|
||||
#ifdef TEST_ARRAYS
|
||||
|
@ -1596,7 +1596,7 @@ size_t wxString::find_first_of(const wxChar* sz, size_t nStart) const
|
||||
const wxChar *start = c_str() + nStart;
|
||||
const wxChar *firstOf = wxStrpbrk(start, sz);
|
||||
if ( firstOf )
|
||||
return firstOf - start;
|
||||
return firstOf - c_str();
|
||||
else
|
||||
return npos;
|
||||
}
|
||||
|
@ -2,13 +2,21 @@
|
||||
// Name: tokenzr.cpp
|
||||
// Purpose: String tokenizer
|
||||
// Author: Guilhem Lavaux
|
||||
// Modified by: Gregory Pietsch
|
||||
// Modified by: Vadim Zeitlin
|
||||
// Created: 04/22/98
|
||||
// RCS-ID: $Id$
|
||||
// Copyright: (c) Guilhem Lavaux
|
||||
// Licence: wxWindows licence
|
||||
/////////////////////////////////////////////////////////////////////////////
|
||||
|
||||
// ============================================================================
|
||||
// declarations
|
||||
// ============================================================================
|
||||
|
||||
// ----------------------------------------------------------------------------
|
||||
// headers
|
||||
// ----------------------------------------------------------------------------
|
||||
|
||||
#ifdef __GNUG__
|
||||
#pragma implementation "tokenzr.h"
|
||||
#endif
|
||||
@ -22,86 +30,101 @@
|
||||
|
||||
#include "wx/tokenzr.h"
|
||||
|
||||
// ============================================================================
|
||||
// implementation
|
||||
// ============================================================================
|
||||
|
||||
// ----------------------------------------------------------------------------
|
||||
// wxStringTokenizer construction
|
||||
// ----------------------------------------------------------------------------
|
||||
|
||||
wxStringTokenizer::wxStringTokenizer(const wxString& to_tokenize,
|
||||
const wxString& delims,
|
||||
bool ret_delims)
|
||||
{
|
||||
SetString(to_tokenize, delims, ret_delims);
|
||||
}
|
||||
|
||||
void wxStringTokenizer::SetString(const wxString& to_tokenize,
|
||||
const wxString& delims,
|
||||
bool ret_delim)
|
||||
{
|
||||
m_string = to_tokenize;
|
||||
m_delims = delims;
|
||||
m_retdelims = ret_delims;
|
||||
m_retdelims = ret_delim;
|
||||
m_pos = 0;
|
||||
|
||||
// empty string doesn't have any tokens
|
||||
m_hasMore = !m_string.empty();
|
||||
}
|
||||
|
||||
wxStringTokenizer::~wxStringTokenizer()
|
||||
{
|
||||
}
|
||||
|
||||
int wxStringTokenizer::CountTokens() const
|
||||
// ----------------------------------------------------------------------------
|
||||
// count the number of tokens in the string
|
||||
// ----------------------------------------------------------------------------
|
||||
|
||||
size_t wxStringTokenizer::CountTokens() const
|
||||
{
|
||||
size_t pos = 0;
|
||||
int count = 0;
|
||||
bool at_delim;
|
||||
size_t count = 0;
|
||||
for ( ;; )
|
||||
{
|
||||
pos = m_string.find_first_of(m_delims, pos);
|
||||
if ( pos == wxString::npos )
|
||||
break;
|
||||
|
||||
while (pos < m_string.length()) {
|
||||
// while we're still counting ...
|
||||
at_delim = (m_delims.find(m_string.at(pos)) < m_delims.length());
|
||||
// are we at a delimiter? if so, move to the next nondelimiter;
|
||||
// if not, move to the next delimiter. If the find_first_of
|
||||
// and find_first_not_of methods fail, pos will be assigned
|
||||
// npos (0xFFFFFFFF) which will terminate the loop on the next
|
||||
// go-round unless we have a really long string, which is unlikely
|
||||
pos = at_delim ? m_string.find_first_not_of(m_delims, pos)
|
||||
: m_string.find_first_of(m_delims, pos);
|
||||
if (m_retdelims)
|
||||
{
|
||||
// if we're retaining delimiters, increment count
|
||||
count++;
|
||||
}
|
||||
else
|
||||
{
|
||||
// if we're not retaining delimiters and at a token, inc count
|
||||
count += (!at_delim);
|
||||
}
|
||||
count++; // one more token found
|
||||
|
||||
pos++; // skip delimiter
|
||||
}
|
||||
|
||||
// normally, we didn't count the last token in the loop above - so add it
|
||||
// unless the string was empty from the very beginning, in which case it
|
||||
// still has 0 (and not 1) tokens
|
||||
if ( !m_string.empty() )
|
||||
{
|
||||
count++;
|
||||
}
|
||||
|
||||
return count;
|
||||
}
|
||||
|
||||
bool wxStringTokenizer::HasMoreTokens()
|
||||
{
|
||||
return (m_retdelims
|
||||
? !m_string.IsEmpty()
|
||||
: m_string.find_first_not_of(m_delims) < m_string.length());
|
||||
}
|
||||
// ----------------------------------------------------------------------------
|
||||
// token extraction
|
||||
// ----------------------------------------------------------------------------
|
||||
|
||||
wxString wxStringTokenizer::NextToken()
|
||||
wxString wxStringTokenizer::GetNextToken()
|
||||
{
|
||||
size_t pos;
|
||||
wxString r_string;
|
||||
wxString token;
|
||||
if ( HasMoreTokens() )
|
||||
{
|
||||
size_t pos = m_string.find_first_of(m_delims); // end of token
|
||||
size_t pos2; // start of the next one
|
||||
if ( pos != wxString::npos )
|
||||
{
|
||||
// return the delimiter too
|
||||
pos2 = pos + 1;
|
||||
}
|
||||
else
|
||||
{
|
||||
pos2 = m_string.length();
|
||||
|
||||
if ( m_string.IsEmpty() )
|
||||
return m_string;
|
||||
pos = m_string.find_first_not_of(m_delims);
|
||||
if ( m_retdelims ) {
|
||||
// we're retaining delimiters (unusual behavior, IMHO)
|
||||
if (pos == 0)
|
||||
// first char is a non-delimiter
|
||||
pos = m_string.find_first_of(m_delims);
|
||||
} else {
|
||||
// we're not retaining delimiters
|
||||
m_string.erase(0, pos);
|
||||
m_pos += pos;
|
||||
if (m_string.IsEmpty())
|
||||
return m_string;
|
||||
pos = m_string.find_first_of(m_delims);
|
||||
// no more tokens in this string
|
||||
m_hasMore = FALSE;
|
||||
}
|
||||
|
||||
token = wxString(m_string, m_retdelims ? pos2 : pos);
|
||||
|
||||
// remove token with the following it delimiter from string
|
||||
m_string.erase(0, pos2);
|
||||
|
||||
// keep track of the position in the original string too
|
||||
m_pos += pos2;
|
||||
}
|
||||
if (pos <= m_string.length()) {
|
||||
r_string = m_string.substr(0, pos);
|
||||
m_string.erase(0, pos);
|
||||
m_pos += pos;
|
||||
} else {
|
||||
r_string = m_string;
|
||||
m_pos += m_string.length();
|
||||
m_string.Empty();
|
||||
}
|
||||
return r_string;
|
||||
//else: no more tokens, return empty token
|
||||
|
||||
return token;
|
||||
}
|
||||
|
Loading…
Reference in New Issue
Block a user