StringUtil.H

Go to the documentation of this file.
00001 /*!@file Util/StringUtil.H */
00002 
00003 // //////////////////////////////////////////////////////////////////// //
00004 // The iLab Neuromorphic Vision C++ Toolkit - Copyright (C) 2000-2005   //
00005 // by the University of Southern California (USC) and the iLab at USC.  //
00006 // See http://iLab.usc.edu for information about this project.          //
00007 // //////////////////////////////////////////////////////////////////// //
00008 // Major portions of the iLab Neuromorphic Vision Toolkit are protected //
00009 // under the U.S. patent ``Computation of Intrinsic Perceptual Saliency //
00010 // in Visual Environments, and Applications'' by Christof Koch and      //
00011 // Laurent Itti, California Institute of Technology, 2001 (patent       //
00012 // pending; application number 09/912,225 filed July 23, 2001; see      //
00013 // http://pair.uspto.gov/cgi-bin/final/home.pl for current status).     //
00014 // //////////////////////////////////////////////////////////////////// //
00015 // This file is part of the iLab Neuromorphic Vision C++ Toolkit.       //
00016 //                                                                      //
00017 // The iLab Neuromorphic Vision C++ Toolkit is free software; you can   //
00018 // redistribute it and/or modify it under the terms of the GNU General  //
00019 // Public License as published by the Free Software Foundation; either  //
00020 // version 2 of the License, or (at your option) any later version.     //
00021 //                                                                      //
00022 // The iLab Neuromorphic Vision C++ Toolkit is distributed in the hope  //
00023 // that it will be useful, but WITHOUT ANY WARRANTY; without even the   //
00024 // implied warranty of MERCHANTABILITY or FITNESS FOR A PARTICULAR      //
00025 // PURPOSE.  See the GNU General Public License for more details.       //
00026 //                                                                      //
00027 // You should have received a copy of the GNU General Public License    //
00028 // along with the iLab Neuromorphic Vision C++ Toolkit; if not, write   //
00029 // to the Free Software Foundation, Inc., 59 Temple Place, Suite 330,   //
00030 // Boston, MA 02111-1307 USA.                                           //
00031 // //////////////////////////////////////////////////////////////////// //
00032 //
00033 // Primary maintainer for this file: Rob Peters <rjpeters at usc dot edu>
00034 // $HeadURL: svn://isvn.usc.edu/software/invt/trunk/saliency/src/Util/StringUtil.H $
00035 // $Id: StringUtil.H 14609 2011-03-15 17:48:32Z rand $
00036 //
00037 
00038 #ifndef UTIL_STRINGUTIL_H_DEFINED
00039 #define UTIL_STRINGUTIL_H_DEFINED
00040 
00041 #include <cctype> // for tolower(), toupper()
00042 #include <set>
00043 #include <string>
00044 
00045 /// Split a string into tokens separated by the given delimiters.
00046 /** Example:
00047 
00048     \code
00049 
00050     string s = "/:/foo::bar/:baz///";
00051     vector<string> tokens;
00052     split(s, ":/", std::back_inserter(tokens));
00053 
00054     // tokens now contains { "foo", "bar", "baz" };
00055 
00056     \endcode
00057 
00058  */
00059 template <class Itr> inline
00060 void split(const std::string& in, const char* delim, Itr out)
00061 {
00062   // Find start of the first token:
00063   std::string::size_type start = in.find_first_not_of(delim, 0);
00064 
00065   // Find end of the first token:
00066   std::string::size_type end   = in.find_first_of(delim, start);
00067 
00068   while (end != std::string::npos || start != std::string::npos)
00069     {
00070       // Extract the current token and save it:
00071       *out++ = in.substr(start, end - start);
00072 
00073       // Skip delimiters.  Note the "not_of"
00074       start = in.find_first_not_of(delim, end);
00075 
00076       // Find end of the next token:
00077       end = in.find_first_of(delim, start);
00078     }
00079 }
00080 
00081 /// Join a sequence of tokens with the given delimiter
00082 /** Example:
00083 
00084     \code
00085 
00086     const char* toks[] = { "foo", "bar", "baz" };
00087     string s = join(&toks[0], &toks[0] + 3, "||");
00088 
00089     // s == "foo||bar||baz";
00090 
00091     \endcode
00092 */
00093 template <class Itr> inline
00094 std::string join(Itr it, Itr stop, const char* delim)
00095 {
00096   std::string result;
00097   bool first = true;
00098   while (it != stop)
00099     {
00100       if (!first) { result += delim; }
00101       result += (*it++);
00102       first = false;
00103     }
00104   return result;
00105 }
00106 
00107 /// Wrap a sequence of words into a multi-line string, with optional prefix and suffix
00108 /** Use stdLineWrap() if you have a single string that should be first
00109     split into words on standard whitespace and then line-wrapped. */
00110 template <class Itr> inline
00111 std::string lineWrap(Itr itr, Itr stop,
00112                      size_t linelength,
00113                      const std::string& pfx = std::string(),
00114                      const std::string& sfx = std::string())
00115 {
00116   const size_t corelen =
00117     pfx.length() + sfx.length() < linelength
00118     ? (linelength - pfx.length() - sfx.length())
00119     : 1;
00120 
00121   std::string out;
00122   std::string line;
00123 
00124   for ( ; itr != stop; ++itr)
00125     {
00126       if ((*itr).length() + line.length() + 1 <= corelen)
00127         {
00128           if (line.length() > 0)
00129             line += ' ';
00130           line += *itr;
00131         }
00132       else
00133         {
00134           out += pfx;
00135           out += line;
00136           out += sfx;
00137           out += '\n';
00138           line = *itr;
00139         }
00140     }
00141 
00142   out += pfx;
00143   out += line;
00144   out += sfx;
00145 
00146   return out;
00147 }
00148 
00149 /// Split a string on whitespace, then wrap into a multi-line string with optional prefix and suffix
00150 std::string stdLineWrap(const std::string& in, size_t linelength,
00151                         const std::string& pfx = std::string(),
00152                         const std::string& sfx = std::string());
00153 
00154 /// Make a lowercase string out of the input string
00155 inline std::string toLowerCase(const std::string& in)
00156 {
00157   std::string out = in;
00158   for (std::string::size_type i = 0; i < out.size(); ++i)
00159     out[i] = tolower(out[i]);
00160   return out;
00161 }
00162 
00163 /// Make an uppercase string out of the input string
00164 inline std::string toUpperCase(const std::string& in)
00165 {
00166   std::string out = in;
00167   for (std::string::size_type i = 0; i < out.size(); ++i)
00168     out[i] = toupper(out[i]);
00169   return out;
00170 }
00171 
00172 /// Compute the Levenshtein distance between two strings
00173 /** http://en.wikipedia.org/wiki/Levenshtein_distance */
00174 unsigned int levenshteinDistance(const std::string& s,
00175                                  const std::string& t);
00176 
00177 /// Compute the Damerau-Levenshtein distance between two strings
00178 /** http://en.wikipedia.org/wiki/Damerau-Levenshtein_distance */
00179 unsigned int damerauLevenshteinDistance(const std::string& s,
00180                                         const std::string& t);
00181 
00182 /// Convert a CamelCase string to a string of space-separated words
00183 /** @param acronyms If non-null, this set of uppercase acronyms will
00184     be used as a heuristic to find proper word boundaries. */
00185 std::string camelCaseToSpaces(const std::string& s,
00186                               const std::set<std::string>* acronyms);
00187 
00188 /// Remove whitespace from the beginning and end of the input string
00189 std::string trim(std::string const& str);
00190 
00191 // ######################################################################
00192 /* So things look consistent in everyone's emacs... */
00193 /* Local Variables: */
00194 /* mode: c++ */
00195 /* indent-tabs-mode: nil */
00196 /* End: */
00197 
00198 #endif // UTIL_STRINGUTIL_H_DEFINED