fix tokenize

base/str.cc:
    Fix tokenize so that it doesn't behave incorrectly when there
    are empty strings.
test/tokentest.cc:
    Clean up the test function so it's easier to see what's going on

--HG--
extra : convert_revision : c7a3db7bc516d3575b1cc4ab7afbd0f1fbe1ec6f
This commit is contained in:
Nathan Binkert 2005-06-22 09:52:02 -04:00
parent 11894d3b4b
commit c95e1281fc
2 changed files with 50 additions and 37 deletions

View file

@ -26,11 +26,10 @@
* OF THIS SOFTWARE, EVEN IF ADVISED OF THE POSSIBILITY OF SUCH DAMAGE. * OF THIS SOFTWARE, EVEN IF ADVISED OF THE POSSIBILITY OF SUCH DAMAGE.
*/ */
#include <iostream>
#include <string.h>
#include <ctype.h> #include <ctype.h>
#include <cstring>
#include <iostream>
#include <string> #include <string>
#include <vector> #include <vector>
@ -75,17 +74,19 @@ tokenize(vector<string>& v, const string &s, char token, bool ignore)
string::size_type first = 0; string::size_type first = 0;
string::size_type last = s.find_first_of(token); string::size_type last = s.find_first_of(token);
if (ignore) { if (s.empty())
if (last == first) { return;
if (ignore && last == first) {
while (last == first) while (last == first)
last = s.find_first_of(token, ++first); last = s.find_first_of(token, ++first);
if (last == string::npos) { if (last == string::npos) {
v.push_back(s); if (first != s.size())
v.push_back(s.substr(first));
return; return;
} }
} }
}
while (last != string::npos) { while (last != string::npos) {
v.push_back(s.substr(first, last - first)); v.push_back(s.substr(first, last - first));

View file

@ -26,7 +26,7 @@
* OF THIS SOFTWARE, EVEN IF ADVISED OF THE POSSIBILITY OF SUCH DAMAGE. * OF THIS SOFTWARE, EVEN IF ADVISED OF THE POSSIBILITY OF SUCH DAMAGE.
*/ */
#include <iostream.h> #include <iostream>
#include <string> #include <string>
#include <vector> #include <vector>
@ -35,6 +35,8 @@
int int
main(int argc, char *argv[]) main(int argc, char *argv[])
{ {
using namespace std;
if (argc != 3) { if (argc != 3) {
cout << "Usage: " << argv[0] << " <string> <token>\n"; cout << "Usage: " << argv[0] << " <string> <token>\n";
exit(1); exit(1);
@ -51,18 +53,28 @@ main(int argc, char *argv[])
tokenize(tokens1, test, token, false); tokenize(tokens1, test, token, false);
if (tokens1.size()) { if (tokens1.size()) {
for (i = 0; i < tokens1.size() - 1; i++) int size = tokens1.size();
cout << tokens1[i] << "(" << tokens1[i].size() << "), "; cout << "size = " << size << "\n";
cout << tokens1[i] << "(" << tokens1[i].size() << ")\n"; for (i = 0; i < size; i++) {
cout << "'" << tokens1[i] << "' (" << tokens1[i].size()
<< ")" << ((i == size - 1) ? "\n" : ", ");
}
} else {
cout << "no tokens" << endl;
} }
cout << "testing with ignore\n"; cout << "testing with ignore\n";
tokenize(tokens2, test, token, true); tokenize(tokens2, test, token, true);
if (tokens2.size()) { if (tokens2.size()) {
for (i = 0; i < tokens2.size() - 1; i++) int size = tokens2.size();
cout << tokens2[i] << "(" << tokens2[i].size() << "), "; cout << "size = " << size << "\n";
cout << tokens2[i] << "(" << tokens2[i].size() << ")\n"; for (i = 0; i < size; i++) {
cout << "'" << tokens2[i] << "' (" << tokens2[i].size()
<< ")" << ((i == size - 1) ? "\n" : ", ");
}
} else {
cout << "no tokens" << endl;
} }
return 0; return 0;