Simple STL String Tokenizer Function
January 10, 2005
This function simply takes an STL string, a string of delimiters, and returns a vector of tokens.
#include <string> #include <vector> using namespace std; vector<string> tokenize(const string& str,const string& delimiters) { vector<string> tokens; // skip delimiters at beginning. string::size_type lastPos = str.find_first_not_of(delimiters, 0); // find first "non-delimiter". string::size_type pos = str.find_first_of(delimiters, lastPos); while (string::npos != pos || string::npos != lastPos) { // found a token, add it to the vector. tokens.push_back(str.substr(lastPos, pos - lastPos)); // skip delimiters. Note the "not_of" lastPos = str.find_first_not_of(delimiters, pos); // find next "non-delimiter" pos = str.find_first_of(delimiters, lastPos); } return tokens; }
This is a variation of the function listed in C++ Programming HOW-TO.
Eric Hu posted the following update to retain empty fields between all delimiters. Some comments below say this is buggy, so see Eli's below:
vector<string> tokenize(const string& str,const string& delimiters) { vector<string> tokens; string::size_type lastPos = 0, pos = 0; int count = 0; if(str.length()<1) return tokens; // skip delimiters at beginning. lastPos = str.find_first_not_of(delimiters, 0); if((str.substr(0, lastPos-pos).length()) > 0) { count = str.substr(0, lastPos-pos).length(); for(int i=0; i < count; i++) tokens.push_back(""); if(string::npos == lastPos) tokens.push_back(""); } // find first "non-delimiter". pos = str.find_first_of(delimiters, lastPos); while (string::npos != pos || string::npos != lastPos) { // found a token, add it to the vector. tokens.push_back( str.substr(lastPos, pos - lastPos)); // skip delimiters. Note the "not_of" lastPos = str.find_first_not_of(delimiters, pos); if((string::npos != pos) && (str.substr(pos, lastPos-pos).length() > 1)) { count = str.substr(pos, lastPos-pos).length(); for(int i=0; i < count; i++) tokens.push_back(""); } pos = str.find_first_of(delimiters, lastPos); } return tokens; }Here's an alternative to Eric's implementation by Eli.
vector<string> Tokenize(const string& str,const string& delimiters) { vector<string> tokens; string::size_type delimPos = 0, tokenPos = 0, pos = 0; if(str.length()<1) return tokens; while(1){ delimPos = str.find_first_of(delimiters, pos); tokenPos = str.find_first_not_of(delimiters, pos); if(string::npos != delimPos){ if(string::npos != tokenPos){ if(tokenPos<delimPos){ tokens.push_back(str.substr(pos,delimPos-pos)); }else{ tokens.push_back(""); } }else{ tokens.push_back(""); } pos = delimPos+1; } else { if(string::npos != tokenPos){ tokens.push_back(str.substr(pos)); } else { tokens.push_back(""); } break; } } return tokens; }
12 Comments