root/trunk/whisperlib/common/base/strutil.h

Revision 7, 13.1 kB (checked in by whispercastorg, 2 years ago)

version 0.2.0

Line 
1 // Copyright (c) 2009, Whispersoft s.r.l.
2 // All rights reserved.
3 //
4 // Redistribution and use in source and binary forms, with or without
5 // modification, are permitted provided that the following conditions are
6 // met:
7 //
8 // * Redistributions of source code must retain the above copyright
9 // notice, this list of conditions and the following disclaimer.
10 // * Redistributions in binary form must reproduce the above
11 // copyright notice, this list of conditions and the following disclaimer
12 // in the documentation and/or other materials provided with the
13 // distribution.
14 // * Neither the name of Whispersoft s.r.l. nor the names of its
15 // contributors may be used to endorse or promote products derived from
16 // this software without specific prior written permission.
17 //
18 // THIS SOFTWARE IS PROVIDED BY THE COPYRIGHT HOLDERS AND CONTRIBUTORS
19 // "AS IS" AND ANY EXPRESS OR IMPLIED WARRANTIES, INCLUDING, BUT NOT
20 // LIMITED TO, THE IMPLIED WARRANTIES OF MERCHANTABILITY AND FITNESS FOR
21 // A PARTICULAR PURPOSE ARE DISCLAIMED. IN NO EVENT SHALL THE COPYRIGHT
22 // OWNER OR CONTRIBUTORS BE LIABLE FOR ANY DIRECT, INDIRECT, INCIDENTAL,
23 // SPECIAL, EXEMPLARY, OR CONSEQUENTIAL DAMAGES (INCLUDING, BUT NOT
24 // LIMITED TO, PROCUREMENT OF SUBSTITUTE GOODS OR SERVICES; LOSS OF USE,
25 // DATA, OR PROFITS; OR BUSINESS INTERRUPTION) HOWEVER CAUSED AND ON ANY
26 // THEORY OF LIABILITY, WHETHER IN CONTRACT, STRICT LIABILITY, OR TORT
27 // (INCLUDING NEGLIGENCE OR OTHERWISE) ARISING IN ANY WAY OUT OF THE USE
28 // OF THIS SOFTWARE, EVEN IF ADVISED OF THE POSSIBILITY OF SUCH DAMAGE.
29 //
30 // Author: Cosmin Tudorache & Catalin Popescu
31
32 //
33 // We have here a bunch of utilities for manipulating strings
34 //
35
36 #ifndef __COMMON_BASE_STRUTIL_H__
37 #define __COMMON_BASE_STRUTIL_H__
38
39 #include <strings.h>
40 #include <string.h>
41 #include <iostream>
42 #include <sstream>
43 #include <string>
44 #include <vector>
45 #include <set>
46 #include <map>
47 #include <algorithm>
48 #include <whisperlib/common/base/types.h>
49 #include <whisperlib/common/base/log.h>
50
51 #include <whisperlib/common/base/third-party/string_util.h>
52
53 namespace strutil {
54
55 // Test string equality.
56 bool StrEql(const char* str1, const char* str2);
57 bool StrEql(const string& str1, const string& str2);
58
59 // Test string equality. Ignore case.
60 bool StrIEql(const char* str1, const char* str2);
61 bool StrIEql(const string& str1, const string& str2);
62
63 // Tests if this string starts with the specified prefix.
64 bool StrPrefix(const char* str, const char* prefix);
65 bool StrStartsWith(const char* str, const char* prefix);
66 bool StrStartsWith(const string& str, const string& prefix);
67
68 // Tests if this string starts with the specified prefix - ignoring case.
69 bool StrCasePrefix(const char* str, const char* prefix);
70 bool StrIStartsWith(const char* str, const char* prefix);
71 bool StrIStartsWith(const string& str, const string& prefix);
72
73 // Tests if string ends with the specified sufix.
74 bool StrSuffix(const char* str, const char* suffix);
75 bool StrSuffix(const string& str, const string& suffix);
76 bool StrEndsWith(const char* str, const char* suffix);
77 bool StrEndsWith(const string& str, const string& suffix);
78
79 // Passes over the spaces (and tabs) of the given string
80 const char* StrFrontTrim(const char* str);
81 // Passes over the spaces (and tabs) of the given string
82 string StrFrontTrim(const string& str);
83
84 // Removes the front and back spaces (and tabs) of the given string
85 string StrTrim(const string& str);
86
87 // Removes the front and back characters from chars_to_trim for the
88 // given string str
89 string StrTrimChars(const string& str, const char* chars_to_trim);
90
91 // Removes all spaces from the given string
92 string StrTrimCompress(const string& str);
93
94 // Compares two strings w/o case
95 inline bool StrCaseEqual(const string& s1, const string& s2) {
96   return ( s1.size() == s2.size() &&
97            !strncasecmp(s1.c_str(), s2.c_str(), s1.size()) );
98 }
99
100 // Moves data inside buffer drom buf + shift_size to the beginning
101 // of the buffer and fills w/ fill_value afterwards
102 void ShiftLeftBuffer(void* buf,
103                      size_t buf_size,
104                      size_t shift_size,
105                      int fill_value);
106
107 // Given an array of strings it joins them using the provided glue string
108 string JoinStrings(const char* pieces[], size_t size, const char* glue);
109
110 // Given an array of strings it joins them using the provided glue string
111 string JoinStrings(const vector<string>& pieces, const char* glue);
112
113 // Takes a string, a separator and splits the string in constituting componants
114 // separated by the separator (which is in none of them);
115 void SplitString(const string& s,
116                  const string& separator,
117                  vector<string>* output);
118
119 // Splits a string in two - befors and after the first occurance of the
120 // sepparator. If separator is not found, the first string will be the
121 // input string. The separator will not appear at all.
122 inline pair<string, string> SplitFirst(const char* s, char separator) {
123   const char* slash_pos = strchr(s,  separator);
124   if ( !slash_pos ) {
125     return make_pair(string(s), string(""));
126   }
127   return make_pair(string(s, slash_pos - s), string(slash_pos + 1));
128 }
129
130 // Splits a string that contains a list of pairs of elements:
131 // <elem1_1>sep2<elem1_2>sep1<elem2_1>sep2<elem2_2>sep1...
132 //   ...sep1<elemN_1>sep2<elemN_2>
133 //
134 void SplitPairs(const string& s,
135                 const string& elements_separator,  // sep1
136                 const string& pair_separator,      // sep2
137                 vector< pair<string, string> >* output);
138
139 // Splits a string on separators outside the brackets
140 // e.g.
141 // SplitBracketedString("a(b, c, d(3)), d(), e(d(3))", ',', '(', ')')
142 // will generate:
143 // ["a(b, c, d(3))", " d()", " e(d(3))"]
144 // Returns false on error (misplaced brackets etc..
145 bool SplitBracketedString(const char* s,
146                           const char separator,
147                           const char open_bracket,
148                           const char close_bracket,
149                           vector<string>* output);
150 // Removes outermost brackets from a string
151 // parsed by SplitBracketedString
152 string RemoveOutermostBrackets(const string& s,
153                                const char open_bracket,
154                                const char close_bracket);
155
156 // Directory name processing helpers
157 const char* Basename(const char* filename);
158 string Basename(const string& filename);
159 string Dirname(const string& filename);
160 string CutExtension(const string& filename);
161 string Extension(const string& filename);
162
163 /// TODO(cpopescu): use eveywhere a path separator const.
164
165 // normalizes a file path (collapses ../, ./ // etc)  but leaves
166 // all the prefix '/'
167 string NormalizePath(const string& path);
168
169 // Joins to paths together, cannonically
170 inline string JoinPaths(const string& path1, const string& path2) {
171   if ( path1.empty() ) return NormalizePath(path2);
172   if ( path2.empty() ) return NormalizePath(path1);
173   if ( path1 == "/" ) {
174     return NormalizePath(path1 + path2);
175   }
176   return NormalizePath(path1 + "/" + path2);
177 }
178
179 // similar with NormalizePath, but collapses the prefix '/'
180 string NormalizeUrlPath(const string& path);
181
182
183 // Transforms a data buffer to a printable string (a'la od)
184 string PrintableDataBuffer(const void* buffer, size_t size);
185 // Similar to PrintableDataBuffer but returns only the HEXA printing.
186 string PrintableDataBufferHexa(const void* buffer, size_t size);
187
188 // Some useful functions for formatted printing in a string ..
189 string StringPrintf(const char* format, ...);
190 string StringPrintf(const char* format, va_list args);
191 void StringAppendf(string* s, const char* format, va_list args);
192
193
194 struct toupper_s {
195   int operator()(int c) {
196     return ::toupper(c);
197   }
198 };
199 struct tolower_s {
200   int operator()(int c) {
201     return ::tolower(c);
202   }
203 };
204
205 inline const string& StrToUpper(string& s) {
206   transform(s.begin(), s.end(), s.begin(), toupper_s());
207   return s;
208 }
209
210 inline const string& StrToLower(string& s) {
211   transform(s.begin(), s.end(), s.begin(), tolower_s());
212   return s;
213 }
214
215 // Small helper to get the string representation of an object
216 template <class T>
217 string StringOf(T object) {
218   ostringstream os;
219   os << object;
220   return os.str();
221 }
222
223 template <typename K, typename V>
224 string ToString(const map<K, V>& m) {
225   ostringstream oss;
226   oss << "map #" << m.size() << "{";
227   for ( typename map<K, V>::const_iterator it = m.begin(); it != m.end(); ) {
228     const K& k = it->first;
229     const V& v = it->second;
230     oss << "[" << k << ", " << v << "]";
231     ++it;
232     if ( it != m.end() ) {
233       oss << ", ";
234     }
235   }
236   oss << "}";
237   return oss.str();
238 }
239 template <typename T>
240 string ToString(const set<T>& v) {
241   ostringstream oss;
242   oss << "set #" << v.size() << "{";
243   for ( typename set<T>::const_iterator it = v.begin(); it != v.end(); ) {
244     const T& t = *it;
245     oss << t;
246     ++it;
247     if ( it != v.end() ) {
248       oss << ", ";
249     }
250   }
251   oss << "}";
252   return oss.str();
253 }
254 template <typename T>
255 string ToString(const vector<T>& v) {
256   ostringstream oss;
257   oss << "vector #" << v.size() << "{";
258   for ( typename vector<T>::const_iterator it = v.begin(); it != v.end(); ) {
259     const T& t = *it;
260     oss << t;
261     ++it;
262     if ( it != v.end() ) {
263       oss << ", ";
264     }
265   }
266   oss << "}";
267   return oss.str();
268 }
269
270 // Works with numeric types only: int8, uint8, int16, uint16, ...
271 // e.g. 0xa3 => "10100011"
272 template <typename T>
273 string ToBinary(T x) {
274   char txt[(sizeof(T) + 1) * 8] = {0,};
275   for ( uint32 i = 0; i < sizeof(T) * 8; i++ ) {
276     if ( i % 8 == 0 && i > 0) {
277       txt[i] = ' ';
278       continue;
279     }
280     txt[i + i/8] = ((x >> (sizeof(T) * 8 - 1 - i)) & 1) == 1 ? '1' : '0';
281   }
282   txt[(sizeof(T) + 1) * 8 - 1] = '\0';
283   return txt;
284 }
285
286 // return a string s such that:
287 //      s > prefix
288 //  and
289 //     does not exists s' such that:
290 //        s > s' > prefix
291 //
292 string GetNextInLexicographicOrder(const string& prefix);
293
294 // Utility for finding bounds in a map keyed by strings, givven a key prefix
295 template<class C>
296 void GetBounds(const string& prefix,
297                map<string, C>* m,
298                typename map<string, C>::iterator* begin,
299                typename map<string, C>::iterator* end) {
300   if ( prefix.empty() ) {
301     *begin = m->begin();
302   } else {
303     *begin = m->lower_bound(prefix);
304   }
305   const string upper_bound = strutil::GetNextInLexicographicOrder(prefix);
306   if ( upper_bound.empty() ) {
307     *end = m->end();
308   } else {
309     *end = m->upper_bound(upper_bound);
310   }
311 }
312 template<class C>
313 void GetBounds(const string& prefix,
314                const map<string, C>& m,
315                typename map<string, C>::const_iterator* begin,
316                typename map<string, C>::const_iterator* end) {
317   if ( prefix.empty() ) {
318     *begin = m.begin();
319   } else {
320     *begin = m.lower_bound(prefix);
321   }
322   const string upper_bound = strutil::GetNextInLexicographicOrder(prefix);
323   if ( upper_bound.empty() ) {
324     *end = m.end();
325   } else {
326     *end = m.upper_bound(upper_bound);
327   }
328 }
329
330 //  Replace characters "szCharsToEscape" in "text" with escape sequences
331 // marked by "escape". The escaped chars are replaced by "escape" character
332 // followed by their ASCII code as 2 digit text.
333 //
334 //  The escape char is replace by "escape""escape".
335 // e.g.
336 //  StrEscape("a,., b,c", '#', ",.") => "a#44#46#44 b#44c"
337 //  StrEscape("a,.# b,c", '#', ",.") => "a#44#46## b#44c"
338 string StrEscape(const char* text, char escape,
339                       const char* chars_to_escape);
340 string StrEscape(const string& text,
341                  char escape,
342                  const char* chars_to_escape);
343 string StrNEscape(const char* text, size_t size, char escape,
344                   const char* chars_to_escape);
345
346
347 //  The reverse of StrEscape.
348 string StrUnescape(const char* text, char escape);
349 string StrUnescape(const string& text, char escape);
350
351 // Escapes a string for JSON encoding
352 string JsonStrEscape(const char* text, size_t size);
353 inline string JsonStrEscape(const string& text) {
354   return JsonStrEscape(text.c_str(), text.size());
355 }
356 string JsonStrUnescape(const char* text, size_t size);
357 inline string JsonStrUnescape(const string& text) {
358   return JsonStrUnescape(text.c_str(), text.length());
359 }
360
361 // Returns true if the string is a valid identifier (a..z A..Z 0..9 and _)
362 // TODO(cosmin): clear this up!
363 //               Can it start with a digit?
364 inline bool IsValidIdentifier(const char* s) {
365   if ( *s < '0' || (*s > '9' && *s < 'A') ||  (*s > 'Z' && *s < 'a') || *s > 'z' ) {
366     return false;
367   }
368   ++s;
369   while ( *s ) {
370     if ( *s < '0' ||
371          (*s > '9' && *s < 'A') ||
372          (*s > 'Z' && *s < '_') ||
373          (*s > '_' && *s < 'a') ||
374          *s > 'z' ) {
375       return false;
376     }
377     ++s;
378   }
379   return true;
380 }
381
382 // Replaces named variables in the given string with corresponding one
383 // found in the 'vars' map, much in the vein of python formatters.
384 //
385 // E.g.
386 // string s("We found user ${User} who wants to \${10 access ${Resource}.")
387 // map<string, string> m;
388 // m["User"] = "john";
389 // m["Resource"] = "disk";
390 // cout << strutil::StrMapFormat(s, m, "${", "}", '\\') << endl;
391 //
392 // Would result in:
393 // We found user john who wants to ${10 access disk.
394 // escape_char escapes first chars in both arg_begin and arg_end.
395 //
396 string StrMapFormat(const char* s,
397                     const map<string, string>& m,
398                     const char* arg_begin = "${",
399                     const char* arg_end = "}",
400                     char escape_char = '\\');
401
402 }
403
404 # endif  // __COMMON_BASE_STRUTIL_H__
Note: See TracBrowser for help on using the browser.