1. bug in wxString::find_first_of() fixed

2. new wxStringTokenizer class and the docs for it


git-svn-id: https://svn.wxwidgets.org/svn/wx/wxWidgets/trunk@5766 c3d73ce0-8a6f-49c7-b76d-6d57e0e08775
This commit is contained in:
Vadim Zeitlin 2000-01-31 20:46:49 +00:00
parent 54744d3ab7
commit bbf8fc5391
5 changed files with 237 additions and 101 deletions

View File

@ -2,6 +2,36 @@
wxStringTokenizer helps you to break a string up into a number of tokens.
To use this class, you should create a wxStringTokenizer object, give it the
string to tokenize and also the delimiters which separate tokens in the string
(by default, white space characters will be used).
Then \helpref{GetNextToken}{wxstringtokenizergetnexttoken} may be called
repeatedly until it \helpref{HasMoreTokens}{wxstringtokenizerhasmoretokens}
returns FALSE.
For example:
\begin{verbatim}
wxStringTokenizer tkz("first:second:third::fivth", ":");
while ( tkz.HasMoreTokens() )
{
wxString token = tkz.GetNextToken();
// process token here
}
\end{verbatim}
Another feature of this class is that it may return the delimiter which
was found after the token with it. In a simple case like above, you are not
interested in this because the delimiter is always {\tt ':'}, but if the
delimiters string has several characters, you might need to know which of them
follows the current token. In this case, pass {\tt TRUE} to wxStringTokenizer
constructor or \helpref{SetString}{wxstringtokenizersetstring} method and
the delimiter will be appended to each returned token (except for the last
one).
\wxheading{Derived from}
\helpref{wxObject}{wxobject}
@ -21,7 +51,7 @@ Default constructor.
\func{}{wxStringTokenizer}{\param{const wxString\& }{to\_tokenize}, \param{const wxString\& }{delims = " $\backslash$t$\backslash$r$\backslash$n"}, \param{bool }{ret\_delim = FALSE}}
Constructor. Pass the string to tokenize, a string containing delimiters,
a flag specifying whether delimiters are retained.
a flag specifying whether to return delimiters with tokens.
\membersection{wxStringTokenizer::\destruct{wxStringTokenizer}}\label{wxstringtokenizerdtor}
@ -45,13 +75,21 @@ Returns TRUE if the tokenizer has further tokens.
\constfunc{wxString}{GetNextToken}{\void}
Returns the next token.
Returns the next token or empty string if the end of string was reached.
\membersection{wxStringTokenizer::GetPosition}\label{wxstringtokenizergetposition}
\constfunc{size\_t}{GetPosition}{\void}
Returns the current position (i.e. one index after the last returned
token or 0 if GetNextToken() has never been called) in the original
string.
\membersection{wxStringTokenizer::GetString}\label{wxstringtokenizergetstring}
\constfunc{wxString}{GetString}{\void}
Returns the input string.
Returns the part of the starting string without all token already extracted.
\membersection{wxStringTokenizer::SetString}\label{wxstringtokenizersetstring}
@ -60,5 +98,5 @@ Returns the input string.
Initializes the tokenizer.
Pass the string to tokenize, a string containing delimiters,
a flag specifying whether delimiters are retained.
a flag specifying whether to return delimiters with tokens.

View File

@ -2,7 +2,7 @@
// Name: tokenzr.h
// Purpose: String tokenizer
// Author: Guilhem Lavaux
// Modified by: Gregory Pietsch
// Modified by: Vadim Zeitlin
// Created: 04/22/98
// RCS-ID: $Id$
// Copyright: (c) Guilhem Lavaux
@ -18,47 +18,49 @@
#include "wx/object.h"
#include "wx/string.h"
#include "wx/filefn.h"
// default: delimiters are usual white space characters
#define wxDEFAULT_DELIMITERS (_T(" \t\r\n"))
class WXDLLEXPORT wxStringTokenizer : public wxObject
{
public:
wxStringTokenizer(const wxString& to_tokenize,
const wxString& delims = " \t\r\n",
bool ret_delim = FALSE);
// ctors and such
wxStringTokenizer() { m_retdelims = FALSE; m_pos = 0; }
wxStringTokenizer(const wxString& to_tokenize,
const wxString& delims = wxDEFAULT_DELIMITERS,
bool ret_delim = FALSE);
void SetString(const wxString& to_tokenize,
const wxString& delims = wxDEFAULT_DELIMITERS,
bool ret_delim = FALSE);
virtual ~wxStringTokenizer();
int CountTokens() const;
bool HasMoreTokens();
// count tokens/get next token
size_t CountTokens() const;
bool HasMoreTokens() { return m_hasMore; }
wxString GetNextToken();
wxString NextToken();
wxString GetNextToken() { return NextToken(); };
wxString GetString() const { return m_string; }
// One note about GetString -- it returns the string
// remaining after the previous tokens have been removed,
// not the original string
wxString GetString() const { return m_string; }
void SetString(const wxString& to_tokenize,
const wxString& delims = " \t\r\n",
bool ret_delim = FALSE)
{
m_string = to_tokenize;
m_delims = delims;
m_retdelims = ret_delim;
m_pos = 0;
}
// returns the current position (i.e. one index after the last returned
// token or 0 if GetNextToken() has never been called) in the original
// string
size_t GetPosition() const { return m_pos; }
// Here's the desired function. It returns the position
// of the next token in the original string by keeping track
// of everything that's been deleted by GetNextToken.
wxUint32 GetPosition() { return m_pos; }
// for compatibility only, use GetNextToken() instead
wxString NextToken() { return GetNextToken(); }
protected:
wxString m_string, m_delims;
bool m_retdelims;
wxUint32 m_pos; // the position
wxString m_string, // the (rest of) string to tokenize
m_delims; // all delimiters
size_t m_pos; // the position in the original string
bool m_retdelims; // if TRUE, return delims with tokens
bool m_hasMore; // do we have more tokens?
};
#endif // _WX_TOKENZRH

View File

@ -29,16 +29,16 @@
// what to test?
#define TEST_ARRAYS
#define TEST_CMDLINE
#define TEST_DIR
#define TEST_EXECUTE
#define TEST_LOG
#define TEST_LONGLONG
#define TEST_MIME
//#define TEST_ARRAYS
//#define TEST_CMDLINE
//#define TEST_DIR
//#define TEST_EXECUTE
//#define TEST_LOG
//#define TEST_LONGLONG
//#define TEST_MIME
#define TEST_STRINGS
#define TEST_THREADS
#define TEST_TIME
//#define TEST_THREADS
//#define TEST_TIME
// ============================================================================
// implementation
@ -1699,6 +1699,7 @@ void PrintArray(const char* name, const wxArrayString& array)
#ifdef TEST_STRINGS
#include "wx/timer.h"
#include "wx/tokenzr.h"
static void TestString()
{
@ -1826,6 +1827,77 @@ static void TestStringFind()
puts("");
}
// replace TABs with \t and CRs with \n
static wxString MakePrintable(const wxChar *s)
{
wxString str(s);
(void)str.Replace(_T("\t"), _T("\\t"));
(void)str.Replace(_T("\n"), _T("\\n"));
(void)str.Replace(_T("\r"), _T("\\r"));
return str;
}
static void TestStringTokenizer()
{
puts("*** Testing wxStringTokenizer ***");
static const struct StringTokenizerTest
{
const wxChar *str; // string to tokenize
const wxChar *delims; // delimiters to use
size_t count; // count of token
bool with; // return tokens with delimiters?
} tokenizerTestData[] =
{
{ _T(""), _T(" "), 0, FALSE },
{ _T("Hello, world"), _T(" "), 2, FALSE },
{ _T("Hello, world"), _T(","), 2, FALSE },
{ _T("Hello, world!"), _T(",!"), 3, TRUE },
{ _T("username:password:uid:gid:gecos:home:shell"), _T(":"), 7, FALSE },
{ _T("1 \t3\t4 6 "), wxDEFAULT_DELIMITERS, 9, TRUE },
{ _T("01/02/99"), _T("/-"), 3, FALSE },
};
for ( size_t n = 0; n < WXSIZEOF(tokenizerTestData); n++ )
{
const StringTokenizerTest& tt = tokenizerTestData[n];
wxStringTokenizer tkz(tt.str, tt.delims, tt.with);
size_t count = tkz.CountTokens();
printf(_T("String '%s' has %u tokens delimited by '%s' "),
tt.str,
count,
MakePrintable(tt.delims).c_str());
if ( count == tt.count )
{
puts(_T("(ok)"));
}
else
{
printf(_T("(ERROR: should be %u)\n"), tt.count);
continue;
}
// now show the tokens themselves
size_t count2 = 0;
while ( tkz.HasMoreTokens() )
{
printf(_T("\ttoken %u: '%s'\n"),
++count2,
MakePrintable(tkz.GetNextToken()).c_str());
}
if ( count2 != count )
{
puts(_T("ERROR: token count mismatch"));
}
}
puts("");
}
#endif // TEST_STRINGS
// ----------------------------------------------------------------------------
@ -1889,8 +1961,9 @@ int main(int argc, char **argv)
{
TestStringSub();
TestStringFormat();
TestStringFind();
}
TestStringFind();
TestStringTokenizer();
#endif // TEST_STRINGS
#ifdef TEST_ARRAYS

View File

@ -1596,7 +1596,7 @@ size_t wxString::find_first_of(const wxChar* sz, size_t nStart) const
const wxChar *start = c_str() + nStart;
const wxChar *firstOf = wxStrpbrk(start, sz);
if ( firstOf )
return firstOf - start;
return firstOf - c_str();
else
return npos;
}

View File

@ -2,13 +2,21 @@
// Name: tokenzr.cpp
// Purpose: String tokenizer
// Author: Guilhem Lavaux
// Modified by: Gregory Pietsch
// Modified by: Vadim Zeitlin
// Created: 04/22/98
// RCS-ID: $Id$
// Copyright: (c) Guilhem Lavaux
// Licence: wxWindows licence
/////////////////////////////////////////////////////////////////////////////
// ============================================================================
// declarations
// ============================================================================
// ----------------------------------------------------------------------------
// headers
// ----------------------------------------------------------------------------
#ifdef __GNUG__
#pragma implementation "tokenzr.h"
#endif
@ -22,86 +30,101 @@
#include "wx/tokenzr.h"
// ============================================================================
// implementation
// ============================================================================
// ----------------------------------------------------------------------------
// wxStringTokenizer construction
// ----------------------------------------------------------------------------
wxStringTokenizer::wxStringTokenizer(const wxString& to_tokenize,
const wxString& delims,
bool ret_delims)
{
SetString(to_tokenize, delims, ret_delims);
}
void wxStringTokenizer::SetString(const wxString& to_tokenize,
const wxString& delims,
bool ret_delim)
{
m_string = to_tokenize;
m_delims = delims;
m_retdelims = ret_delims;
m_retdelims = ret_delim;
m_pos = 0;
// empty string doesn't have any tokens
m_hasMore = !m_string.empty();
}
wxStringTokenizer::~wxStringTokenizer()
{
}
int wxStringTokenizer::CountTokens() const
// ----------------------------------------------------------------------------
// count the number of tokens in the string
// ----------------------------------------------------------------------------
size_t wxStringTokenizer::CountTokens() const
{
size_t pos = 0;
int count = 0;
bool at_delim;
size_t count = 0;
for ( ;; )
{
pos = m_string.find_first_of(m_delims, pos);
if ( pos == wxString::npos )
break;
while (pos < m_string.length()) {
// while we're still counting ...
at_delim = (m_delims.find(m_string.at(pos)) < m_delims.length());
// are we at a delimiter? if so, move to the next nondelimiter;
// if not, move to the next delimiter. If the find_first_of
// and find_first_not_of methods fail, pos will be assigned
// npos (0xFFFFFFFF) which will terminate the loop on the next
// go-round unless we have a really long string, which is unlikely
pos = at_delim ? m_string.find_first_not_of(m_delims, pos)
: m_string.find_first_of(m_delims, pos);
if (m_retdelims)
{
// if we're retaining delimiters, increment count
count++;
}
else
{
// if we're not retaining delimiters and at a token, inc count
count += (!at_delim);
}
count++; // one more token found
pos++; // skip delimiter
}
// normally, we didn't count the last token in the loop above - so add it
// unless the string was empty from the very beginning, in which case it
// still has 0 (and not 1) tokens
if ( !m_string.empty() )
{
count++;
}
return count;
}
bool wxStringTokenizer::HasMoreTokens()
{
return (m_retdelims
? !m_string.IsEmpty()
: m_string.find_first_not_of(m_delims) < m_string.length());
}
// ----------------------------------------------------------------------------
// token extraction
// ----------------------------------------------------------------------------
wxString wxStringTokenizer::NextToken()
wxString wxStringTokenizer::GetNextToken()
{
size_t pos;
wxString r_string;
wxString token;
if ( HasMoreTokens() )
{
size_t pos = m_string.find_first_of(m_delims); // end of token
size_t pos2; // start of the next one
if ( pos != wxString::npos )
{
// return the delimiter too
pos2 = pos + 1;
}
else
{
pos2 = m_string.length();
if ( m_string.IsEmpty() )
return m_string;
pos = m_string.find_first_not_of(m_delims);
if ( m_retdelims ) {
// we're retaining delimiters (unusual behavior, IMHO)
if (pos == 0)
// first char is a non-delimiter
pos = m_string.find_first_of(m_delims);
} else {
// we're not retaining delimiters
m_string.erase(0, pos);
m_pos += pos;
if (m_string.IsEmpty())
return m_string;
pos = m_string.find_first_of(m_delims);
// no more tokens in this string
m_hasMore = FALSE;
}
token = wxString(m_string, m_retdelims ? pos2 : pos);
// remove token with the following it delimiter from string
m_string.erase(0, pos2);
// keep track of the position in the original string too
m_pos += pos2;
}
if (pos <= m_string.length()) {
r_string = m_string.substr(0, pos);
m_string.erase(0, pos);
m_pos += pos;
} else {
r_string = m_string;
m_pos += m_string.length();
m_string.Empty();
}
return r_string;
//else: no more tokens, return empty token
return token;
}